entry.S 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #define ASSEMBLY 1
  29. #include <linux/config.h>
  30. #include <linux/linkage.h>
  31. #include <asm/segment.h>
  32. #include <asm/smp.h>
  33. #include <asm/cache.h>
  34. #include <asm/errno.h>
  35. #include <asm/dwarf2.h>
  36. #include <asm/calling.h>
  37. #include <asm/offset.h>
  38. #include <asm/msr.h>
  39. #include <asm/unistd.h>
  40. #include <asm/thread_info.h>
  41. #include <asm/hw_irq.h>
  42. .code64
  43. #ifndef CONFIG_PREEMPT
  44. #define retint_kernel retint_restore_args
  45. #endif
  46. /*
  47. * C code is not supposed to know about undefined top of stack. Every time
  48. * a C function with an pt_regs argument is called from the SYSCALL based
  49. * fast path FIXUP_TOP_OF_STACK is needed.
  50. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  51. * manipulation.
  52. */
  53. /* %rsp:at FRAMEEND */
  54. .macro FIXUP_TOP_OF_STACK tmp
  55. movq %gs:pda_oldrsp,\tmp
  56. movq \tmp,RSP(%rsp)
  57. movq $__USER_DS,SS(%rsp)
  58. movq $__USER_CS,CS(%rsp)
  59. movq $-1,RCX(%rsp)
  60. movq R11(%rsp),\tmp /* get eflags */
  61. movq \tmp,EFLAGS(%rsp)
  62. .endm
  63. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  64. movq RSP-\offset(%rsp),\tmp
  65. movq \tmp,%gs:pda_oldrsp
  66. movq EFLAGS-\offset(%rsp),\tmp
  67. movq \tmp,R11-\offset(%rsp)
  68. .endm
  69. .macro FAKE_STACK_FRAME child_rip
  70. /* push in order ss, rsp, eflags, cs, rip */
  71. xorl %eax, %eax
  72. pushq %rax /* ss */
  73. CFI_ADJUST_CFA_OFFSET 8
  74. pushq %rax /* rsp */
  75. CFI_ADJUST_CFA_OFFSET 8
  76. CFI_OFFSET rip,0
  77. pushq $(1<<9) /* eflags - interrupts on */
  78. CFI_ADJUST_CFA_OFFSET 8
  79. pushq $__KERNEL_CS /* cs */
  80. CFI_ADJUST_CFA_OFFSET 8
  81. pushq \child_rip /* rip */
  82. CFI_ADJUST_CFA_OFFSET 8
  83. CFI_OFFSET rip,0
  84. pushq %rax /* orig rax */
  85. CFI_ADJUST_CFA_OFFSET 8
  86. .endm
  87. .macro UNFAKE_STACK_FRAME
  88. addq $8*6, %rsp
  89. CFI_ADJUST_CFA_OFFSET -(6*8)
  90. .endm
  91. .macro CFI_DEFAULT_STACK
  92. CFI_ADJUST_CFA_OFFSET (SS)
  93. CFI_OFFSET r15,R15-SS
  94. CFI_OFFSET r14,R14-SS
  95. CFI_OFFSET r13,R13-SS
  96. CFI_OFFSET r12,R12-SS
  97. CFI_OFFSET rbp,RBP-SS
  98. CFI_OFFSET rbx,RBX-SS
  99. CFI_OFFSET r11,R11-SS
  100. CFI_OFFSET r10,R10-SS
  101. CFI_OFFSET r9,R9-SS
  102. CFI_OFFSET r8,R8-SS
  103. CFI_OFFSET rax,RAX-SS
  104. CFI_OFFSET rcx,RCX-SS
  105. CFI_OFFSET rdx,RDX-SS
  106. CFI_OFFSET rsi,RSI-SS
  107. CFI_OFFSET rdi,RDI-SS
  108. CFI_OFFSET rsp,RSP-SS
  109. CFI_OFFSET rip,RIP-SS
  110. .endm
  111. /*
  112. * A newly forked process directly context switches into this.
  113. */
  114. /* rdi: prev */
  115. ENTRY(ret_from_fork)
  116. CFI_STARTPROC
  117. CFI_DEFAULT_STACK
  118. call schedule_tail
  119. GET_THREAD_INFO(%rcx)
  120. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  121. jnz rff_trace
  122. rff_action:
  123. RESTORE_REST
  124. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  125. je int_ret_from_sys_call
  126. testl $_TIF_IA32,threadinfo_flags(%rcx)
  127. jnz int_ret_from_sys_call
  128. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  129. jmp ret_from_sys_call
  130. rff_trace:
  131. movq %rsp,%rdi
  132. call syscall_trace_leave
  133. GET_THREAD_INFO(%rcx)
  134. jmp rff_action
  135. CFI_ENDPROC
  136. /*
  137. * System call entry. Upto 6 arguments in registers are supported.
  138. *
  139. * SYSCALL does not save anything on the stack and does not change the
  140. * stack pointer.
  141. */
  142. /*
  143. * Register setup:
  144. * rax system call number
  145. * rdi arg0
  146. * rcx return address for syscall/sysret, C arg3
  147. * rsi arg1
  148. * rdx arg2
  149. * r10 arg3 (--> moved to rcx for C)
  150. * r8 arg4
  151. * r9 arg5
  152. * r11 eflags for syscall/sysret, temporary for C
  153. * r12-r15,rbp,rbx saved by C code, not touched.
  154. *
  155. * Interrupts are off on entry.
  156. * Only called from user space.
  157. *
  158. * XXX if we had a free scratch register we could save the RSP into the stack frame
  159. * and report it properly in ps. Unfortunately we haven't.
  160. */
  161. ENTRY(system_call)
  162. CFI_STARTPROC
  163. swapgs
  164. movq %rsp,%gs:pda_oldrsp
  165. movq %gs:pda_kernelstack,%rsp
  166. sti
  167. SAVE_ARGS 8,1
  168. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  169. movq %rcx,RIP-ARGOFFSET(%rsp)
  170. GET_THREAD_INFO(%rcx)
  171. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  172. jnz tracesys
  173. cmpq $__NR_syscall_max,%rax
  174. ja badsys
  175. movq %r10,%rcx
  176. call *sys_call_table(,%rax,8) # XXX: rip relative
  177. movq %rax,RAX-ARGOFFSET(%rsp)
  178. /*
  179. * Syscall return path ending with SYSRET (fast path)
  180. * Has incomplete stack frame and undefined top of stack.
  181. */
  182. .globl ret_from_sys_call
  183. ret_from_sys_call:
  184. movl $_TIF_ALLWORK_MASK,%edi
  185. /* edi: flagmask */
  186. sysret_check:
  187. GET_THREAD_INFO(%rcx)
  188. cli
  189. movl threadinfo_flags(%rcx),%edx
  190. andl %edi,%edx
  191. jnz sysret_careful
  192. movq RIP-ARGOFFSET(%rsp),%rcx
  193. RESTORE_ARGS 0,-ARG_SKIP,1
  194. movq %gs:pda_oldrsp,%rsp
  195. swapgs
  196. sysretq
  197. /* Handle reschedules */
  198. /* edx: work, edi: workmask */
  199. sysret_careful:
  200. bt $TIF_NEED_RESCHED,%edx
  201. jnc sysret_signal
  202. sti
  203. pushq %rdi
  204. call schedule
  205. popq %rdi
  206. jmp sysret_check
  207. /* Handle a signal */
  208. sysret_signal:
  209. sti
  210. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  211. jz 1f
  212. /* Really a signal */
  213. /* edx: work flags (arg3) */
  214. leaq do_notify_resume(%rip),%rax
  215. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  216. xorl %esi,%esi # oldset -> arg2
  217. call ptregscall_common
  218. 1: movl $_TIF_NEED_RESCHED,%edi
  219. jmp sysret_check
  220. /* Do syscall tracing */
  221. tracesys:
  222. SAVE_REST
  223. movq $-ENOSYS,RAX(%rsp)
  224. FIXUP_TOP_OF_STACK %rdi
  225. movq %rsp,%rdi
  226. call syscall_trace_enter
  227. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  228. RESTORE_REST
  229. cmpq $__NR_syscall_max,%rax
  230. ja 1f
  231. movq %r10,%rcx /* fixup for C */
  232. call *sys_call_table(,%rax,8)
  233. movq %rax,RAX-ARGOFFSET(%rsp)
  234. 1: SAVE_REST
  235. movq %rsp,%rdi
  236. call syscall_trace_leave
  237. RESTORE_TOP_OF_STACK %rbx
  238. RESTORE_REST
  239. jmp ret_from_sys_call
  240. badsys:
  241. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  242. jmp ret_from_sys_call
  243. /*
  244. * Syscall return path ending with IRET.
  245. * Has correct top of stack, but partial stack frame.
  246. */
  247. ENTRY(int_ret_from_sys_call)
  248. cli
  249. testl $3,CS-ARGOFFSET(%rsp)
  250. je retint_restore_args
  251. movl $_TIF_ALLWORK_MASK,%edi
  252. /* edi: mask to check */
  253. int_with_check:
  254. GET_THREAD_INFO(%rcx)
  255. movl threadinfo_flags(%rcx),%edx
  256. andl %edi,%edx
  257. jnz int_careful
  258. jmp retint_swapgs
  259. /* Either reschedule or signal or syscall exit tracking needed. */
  260. /* First do a reschedule test. */
  261. /* edx: work, edi: workmask */
  262. int_careful:
  263. bt $TIF_NEED_RESCHED,%edx
  264. jnc int_very_careful
  265. sti
  266. pushq %rdi
  267. call schedule
  268. popq %rdi
  269. cli
  270. jmp int_with_check
  271. /* handle signals and tracing -- both require a full stack frame */
  272. int_very_careful:
  273. sti
  274. SAVE_REST
  275. /* Check for syscall exit trace */
  276. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  277. jz int_signal
  278. pushq %rdi
  279. leaq 8(%rsp),%rdi # &ptregs -> arg1
  280. call syscall_trace_leave
  281. popq %rdi
  282. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  283. cli
  284. jmp int_restore_rest
  285. int_signal:
  286. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  287. jz 1f
  288. movq %rsp,%rdi # &ptregs -> arg1
  289. xorl %esi,%esi # oldset -> arg2
  290. call do_notify_resume
  291. 1: movl $_TIF_NEED_RESCHED,%edi
  292. int_restore_rest:
  293. RESTORE_REST
  294. cli
  295. jmp int_with_check
  296. CFI_ENDPROC
  297. /*
  298. * Certain special system calls that need to save a complete full stack frame.
  299. */
  300. .macro PTREGSCALL label,func,arg
  301. .globl \label
  302. \label:
  303. leaq \func(%rip),%rax
  304. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  305. jmp ptregscall_common
  306. .endm
  307. PTREGSCALL stub_clone, sys_clone, %r8
  308. PTREGSCALL stub_fork, sys_fork, %rdi
  309. PTREGSCALL stub_vfork, sys_vfork, %rdi
  310. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  311. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  312. PTREGSCALL stub_iopl, sys_iopl, %rsi
  313. ENTRY(ptregscall_common)
  314. CFI_STARTPROC
  315. popq %r11
  316. CFI_ADJUST_CFA_OFFSET -8
  317. SAVE_REST
  318. movq %r11, %r15
  319. FIXUP_TOP_OF_STACK %r11
  320. call *%rax
  321. RESTORE_TOP_OF_STACK %r11
  322. movq %r15, %r11
  323. RESTORE_REST
  324. pushq %r11
  325. CFI_ADJUST_CFA_OFFSET 8
  326. ret
  327. CFI_ENDPROC
  328. ENTRY(stub_execve)
  329. CFI_STARTPROC
  330. popq %r11
  331. CFI_ADJUST_CFA_OFFSET -8
  332. SAVE_REST
  333. movq %r11, %r15
  334. FIXUP_TOP_OF_STACK %r11
  335. call sys_execve
  336. GET_THREAD_INFO(%rcx)
  337. bt $TIF_IA32,threadinfo_flags(%rcx)
  338. jc exec_32bit
  339. RESTORE_TOP_OF_STACK %r11
  340. movq %r15, %r11
  341. RESTORE_REST
  342. push %r11
  343. ret
  344. exec_32bit:
  345. CFI_ADJUST_CFA_OFFSET REST_SKIP
  346. movq %rax,RAX(%rsp)
  347. RESTORE_REST
  348. jmp int_ret_from_sys_call
  349. CFI_ENDPROC
  350. /*
  351. * sigreturn is special because it needs to restore all registers on return.
  352. * This cannot be done with SYSRET, so use the IRET return path instead.
  353. */
  354. ENTRY(stub_rt_sigreturn)
  355. CFI_STARTPROC
  356. addq $8, %rsp
  357. SAVE_REST
  358. movq %rsp,%rdi
  359. FIXUP_TOP_OF_STACK %r11
  360. call sys_rt_sigreturn
  361. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  362. RESTORE_REST
  363. jmp int_ret_from_sys_call
  364. CFI_ENDPROC
  365. /*
  366. * Interrupt entry/exit.
  367. *
  368. * Interrupt entry points save only callee clobbered registers in fast path.
  369. *
  370. * Entry runs with interrupts off.
  371. */
  372. /* 0(%rsp): interrupt number */
  373. .macro interrupt func
  374. CFI_STARTPROC simple
  375. CFI_DEF_CFA rsp,(SS-RDI)
  376. CFI_REL_OFFSET rsp,(RSP-ORIG_RAX)
  377. CFI_REL_OFFSET rip,(RIP-ORIG_RAX)
  378. cld
  379. #ifdef CONFIG_DEBUG_INFO
  380. SAVE_ALL
  381. movq %rsp,%rdi
  382. /*
  383. * Setup a stack frame pointer. This allows gdb to trace
  384. * back to the original stack.
  385. */
  386. movq %rsp,%rbp
  387. CFI_DEF_CFA_REGISTER rbp
  388. #else
  389. SAVE_ARGS
  390. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  391. #endif
  392. testl $3,CS(%rdi)
  393. je 1f
  394. swapgs
  395. 1: incl %gs:pda_irqcount # RED-PEN should check preempt count
  396. movq %gs:pda_irqstackptr,%rax
  397. cmoveq %rax,%rsp
  398. pushq %rdi # save old stack
  399. call \func
  400. .endm
  401. ENTRY(common_interrupt)
  402. interrupt do_IRQ
  403. /* 0(%rsp): oldrsp-ARGOFFSET */
  404. ret_from_intr:
  405. popq %rdi
  406. cli
  407. decl %gs:pda_irqcount
  408. #ifdef CONFIG_DEBUG_INFO
  409. movq RBP(%rdi),%rbp
  410. #endif
  411. leaq ARGOFFSET(%rdi),%rsp
  412. exit_intr:
  413. GET_THREAD_INFO(%rcx)
  414. testl $3,CS-ARGOFFSET(%rsp)
  415. je retint_kernel
  416. /* Interrupt came from user space */
  417. /*
  418. * Has a correct top of stack, but a partial stack frame
  419. * %rcx: thread info. Interrupts off.
  420. */
  421. retint_with_reschedule:
  422. movl $_TIF_WORK_MASK,%edi
  423. retint_check:
  424. movl threadinfo_flags(%rcx),%edx
  425. andl %edi,%edx
  426. jnz retint_careful
  427. retint_swapgs:
  428. swapgs
  429. retint_restore_args:
  430. cli
  431. RESTORE_ARGS 0,8,0
  432. iret_label:
  433. iretq
  434. .section __ex_table,"a"
  435. .quad iret_label,bad_iret
  436. .previous
  437. .section .fixup,"ax"
  438. /* force a signal here? this matches i386 behaviour */
  439. /* running with kernel gs */
  440. bad_iret:
  441. movq $-9999,%rdi /* better code? */
  442. jmp do_exit
  443. .previous
  444. /* edi: workmask, edx: work */
  445. retint_careful:
  446. bt $TIF_NEED_RESCHED,%edx
  447. jnc retint_signal
  448. sti
  449. pushq %rdi
  450. call schedule
  451. popq %rdi
  452. GET_THREAD_INFO(%rcx)
  453. cli
  454. jmp retint_check
  455. retint_signal:
  456. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  457. jz retint_swapgs
  458. sti
  459. SAVE_REST
  460. movq $-1,ORIG_RAX(%rsp)
  461. xorl %esi,%esi # oldset
  462. movq %rsp,%rdi # &pt_regs
  463. call do_notify_resume
  464. RESTORE_REST
  465. cli
  466. movl $_TIF_NEED_RESCHED,%edi
  467. GET_THREAD_INFO(%rcx)
  468. jmp retint_check
  469. #ifdef CONFIG_PREEMPT
  470. /* Returning to kernel space. Check if we need preemption */
  471. /* rcx: threadinfo. interrupts off. */
  472. .p2align
  473. retint_kernel:
  474. cmpl $0,threadinfo_preempt_count(%rcx)
  475. jnz retint_restore_args
  476. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  477. jnc retint_restore_args
  478. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  479. jnc retint_restore_args
  480. call preempt_schedule_irq
  481. jmp exit_intr
  482. #endif
  483. CFI_ENDPROC
  484. /*
  485. * APIC interrupts.
  486. */
  487. .macro apicinterrupt num,func
  488. pushq $\num-256
  489. interrupt \func
  490. jmp ret_from_intr
  491. CFI_ENDPROC
  492. .endm
  493. ENTRY(thermal_interrupt)
  494. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  495. #ifdef CONFIG_SMP
  496. ENTRY(reschedule_interrupt)
  497. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  498. ENTRY(invalidate_interrupt)
  499. apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt
  500. ENTRY(call_function_interrupt)
  501. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  502. #endif
  503. #ifdef CONFIG_X86_LOCAL_APIC
  504. ENTRY(apic_timer_interrupt)
  505. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  506. ENTRY(error_interrupt)
  507. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  508. ENTRY(spurious_interrupt)
  509. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  510. #endif
  511. /*
  512. * Exception entry points.
  513. */
  514. .macro zeroentry sym
  515. pushq $0 /* push error code/oldrax */
  516. pushq %rax /* push real oldrax to the rdi slot */
  517. leaq \sym(%rip),%rax
  518. jmp error_entry
  519. .endm
  520. .macro errorentry sym
  521. pushq %rax
  522. leaq \sym(%rip),%rax
  523. jmp error_entry
  524. .endm
  525. /* error code is on the stack already */
  526. /* handle NMI like exceptions that can happen everywhere */
  527. .macro paranoidentry sym
  528. SAVE_ALL
  529. cld
  530. movl $1,%ebx
  531. movl $MSR_GS_BASE,%ecx
  532. rdmsr
  533. testl %edx,%edx
  534. js 1f
  535. swapgs
  536. xorl %ebx,%ebx
  537. 1: movq %rsp,%rdi
  538. movq ORIG_RAX(%rsp),%rsi
  539. movq $-1,ORIG_RAX(%rsp)
  540. call \sym
  541. cli
  542. .endm
  543. /*
  544. * Exception entry point. This expects an error code/orig_rax on the stack
  545. * and the exception handler in %rax.
  546. */
  547. ENTRY(error_entry)
  548. CFI_STARTPROC simple
  549. CFI_DEF_CFA rsp,(SS-RDI)
  550. CFI_REL_OFFSET rsp,(RSP-RDI)
  551. CFI_REL_OFFSET rip,(RIP-RDI)
  552. /* rdi slot contains rax, oldrax contains error code */
  553. cld
  554. subq $14*8,%rsp
  555. CFI_ADJUST_CFA_OFFSET (14*8)
  556. movq %rsi,13*8(%rsp)
  557. CFI_REL_OFFSET rsi,RSI
  558. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  559. movq %rdx,12*8(%rsp)
  560. CFI_REL_OFFSET rdx,RDX
  561. movq %rcx,11*8(%rsp)
  562. CFI_REL_OFFSET rcx,RCX
  563. movq %rsi,10*8(%rsp) /* store rax */
  564. CFI_REL_OFFSET rax,RAX
  565. movq %r8, 9*8(%rsp)
  566. CFI_REL_OFFSET r8,R8
  567. movq %r9, 8*8(%rsp)
  568. CFI_REL_OFFSET r9,R9
  569. movq %r10,7*8(%rsp)
  570. CFI_REL_OFFSET r10,R10
  571. movq %r11,6*8(%rsp)
  572. CFI_REL_OFFSET r11,R11
  573. movq %rbx,5*8(%rsp)
  574. CFI_REL_OFFSET rbx,RBX
  575. movq %rbp,4*8(%rsp)
  576. CFI_REL_OFFSET rbp,RBP
  577. movq %r12,3*8(%rsp)
  578. CFI_REL_OFFSET r12,R12
  579. movq %r13,2*8(%rsp)
  580. CFI_REL_OFFSET r13,R13
  581. movq %r14,1*8(%rsp)
  582. CFI_REL_OFFSET r14,R14
  583. movq %r15,(%rsp)
  584. CFI_REL_OFFSET r15,R15
  585. xorl %ebx,%ebx
  586. testl $3,CS(%rsp)
  587. je error_kernelspace
  588. error_swapgs:
  589. swapgs
  590. error_sti:
  591. movq %rdi,RDI(%rsp)
  592. movq %rsp,%rdi
  593. movq ORIG_RAX(%rsp),%rsi /* get error code */
  594. movq $-1,ORIG_RAX(%rsp)
  595. call *%rax
  596. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  597. error_exit:
  598. movl %ebx,%eax
  599. RESTORE_REST
  600. cli
  601. GET_THREAD_INFO(%rcx)
  602. testl %eax,%eax
  603. jne retint_kernel
  604. movl threadinfo_flags(%rcx),%edx
  605. movl $_TIF_WORK_MASK,%edi
  606. andl %edi,%edx
  607. jnz retint_careful
  608. swapgs
  609. RESTORE_ARGS 0,8,0
  610. iretq
  611. CFI_ENDPROC
  612. error_kernelspace:
  613. incl %ebx
  614. /* There are two places in the kernel that can potentially fault with
  615. usergs. Handle them here. The exception handlers after
  616. iret run with kernel gs again, so don't set the user space flag.
  617. B stepping K8s sometimes report an truncated RIP for IRET
  618. exceptions returning to compat mode. Check for these here too. */
  619. leaq iret_label(%rip),%rbp
  620. cmpq %rbp,RIP(%rsp)
  621. je error_swapgs
  622. movl %ebp,%ebp /* zero extend */
  623. cmpq %rbp,RIP(%rsp)
  624. je error_swapgs
  625. cmpq $gs_change,RIP(%rsp)
  626. je error_swapgs
  627. jmp error_sti
  628. /* Reload gs selector with exception handling */
  629. /* edi: new selector */
  630. ENTRY(load_gs_index)
  631. pushf
  632. cli
  633. swapgs
  634. gs_change:
  635. movl %edi,%gs
  636. 2: mfence /* workaround */
  637. swapgs
  638. popf
  639. ret
  640. .section __ex_table,"a"
  641. .align 8
  642. .quad gs_change,bad_gs
  643. .previous
  644. .section .fixup,"ax"
  645. /* running with kernelgs */
  646. bad_gs:
  647. swapgs /* switch back to user gs */
  648. xorl %eax,%eax
  649. movl %eax,%gs
  650. jmp 2b
  651. .previous
  652. /*
  653. * Create a kernel thread.
  654. *
  655. * C extern interface:
  656. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  657. *
  658. * asm input arguments:
  659. * rdi: fn, rsi: arg, rdx: flags
  660. */
  661. ENTRY(kernel_thread)
  662. CFI_STARTPROC
  663. FAKE_STACK_FRAME $child_rip
  664. SAVE_ALL
  665. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  666. movq %rdx,%rdi
  667. orq kernel_thread_flags(%rip),%rdi
  668. movq $-1, %rsi
  669. movq %rsp, %rdx
  670. xorl %r8d,%r8d
  671. xorl %r9d,%r9d
  672. # clone now
  673. call do_fork
  674. movq %rax,RAX(%rsp)
  675. xorl %edi,%edi
  676. /*
  677. * It isn't worth to check for reschedule here,
  678. * so internally to the x86_64 port you can rely on kernel_thread()
  679. * not to reschedule the child before returning, this avoids the need
  680. * of hacks for example to fork off the per-CPU idle tasks.
  681. * [Hopefully no generic code relies on the reschedule -AK]
  682. */
  683. RESTORE_ALL
  684. UNFAKE_STACK_FRAME
  685. ret
  686. CFI_ENDPROC
  687. child_rip:
  688. /*
  689. * Here we are in the child and the registers are set as they were
  690. * at kernel_thread() invocation in the parent.
  691. */
  692. movq %rdi, %rax
  693. movq %rsi, %rdi
  694. call *%rax
  695. # exit
  696. xorl %edi, %edi
  697. call do_exit
  698. /*
  699. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  700. *
  701. * C extern interface:
  702. * extern long execve(char *name, char **argv, char **envp)
  703. *
  704. * asm input arguments:
  705. * rdi: name, rsi: argv, rdx: envp
  706. *
  707. * We want to fallback into:
  708. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  709. *
  710. * do_sys_execve asm fallback arguments:
  711. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  712. */
  713. ENTRY(execve)
  714. CFI_STARTPROC
  715. FAKE_STACK_FRAME $0
  716. SAVE_ALL
  717. call sys_execve
  718. movq %rax, RAX(%rsp)
  719. RESTORE_REST
  720. testq %rax,%rax
  721. je int_ret_from_sys_call
  722. RESTORE_ARGS
  723. UNFAKE_STACK_FRAME
  724. ret
  725. CFI_ENDPROC
  726. KPROBE_ENTRY(page_fault)
  727. errorentry do_page_fault
  728. .previous .text
  729. ENTRY(coprocessor_error)
  730. zeroentry do_coprocessor_error
  731. ENTRY(simd_coprocessor_error)
  732. zeroentry do_simd_coprocessor_error
  733. ENTRY(device_not_available)
  734. zeroentry math_state_restore
  735. /* runs on exception stack */
  736. KPROBE_ENTRY(debug)
  737. CFI_STARTPROC
  738. pushq $0
  739. CFI_ADJUST_CFA_OFFSET 8
  740. paranoidentry do_debug
  741. jmp paranoid_exit
  742. CFI_ENDPROC
  743. .previous .text
  744. /* runs on exception stack */
  745. ENTRY(nmi)
  746. CFI_STARTPROC
  747. pushq $-1
  748. CFI_ADJUST_CFA_OFFSET 8
  749. paranoidentry do_nmi
  750. /*
  751. * "Paranoid" exit path from exception stack.
  752. * Paranoid because this is used by NMIs and cannot take
  753. * any kernel state for granted.
  754. * We don't do kernel preemption checks here, because only
  755. * NMI should be common and it does not enable IRQs and
  756. * cannot get reschedule ticks.
  757. */
  758. /* ebx: no swapgs flag */
  759. paranoid_exit:
  760. testl %ebx,%ebx /* swapgs needed? */
  761. jnz paranoid_restore
  762. testl $3,CS(%rsp)
  763. jnz paranoid_userspace
  764. paranoid_swapgs:
  765. swapgs
  766. paranoid_restore:
  767. RESTORE_ALL 8
  768. iretq
  769. paranoid_userspace:
  770. GET_THREAD_INFO(%rcx)
  771. movl threadinfo_flags(%rcx),%ebx
  772. andl $_TIF_WORK_MASK,%ebx
  773. jz paranoid_swapgs
  774. movq %rsp,%rdi /* &pt_regs */
  775. call sync_regs
  776. movq %rax,%rsp /* switch stack for scheduling */
  777. testl $_TIF_NEED_RESCHED,%ebx
  778. jnz paranoid_schedule
  779. movl %ebx,%edx /* arg3: thread flags */
  780. sti
  781. xorl %esi,%esi /* arg2: oldset */
  782. movq %rsp,%rdi /* arg1: &pt_regs */
  783. call do_notify_resume
  784. cli
  785. jmp paranoid_userspace
  786. paranoid_schedule:
  787. sti
  788. call schedule
  789. cli
  790. jmp paranoid_userspace
  791. CFI_ENDPROC
  792. KPROBE_ENTRY(int3)
  793. zeroentry do_int3
  794. .previous .text
  795. ENTRY(overflow)
  796. zeroentry do_overflow
  797. ENTRY(bounds)
  798. zeroentry do_bounds
  799. ENTRY(invalid_op)
  800. zeroentry do_invalid_op
  801. ENTRY(coprocessor_segment_overrun)
  802. zeroentry do_coprocessor_segment_overrun
  803. ENTRY(reserved)
  804. zeroentry do_reserved
  805. /* runs on exception stack */
  806. ENTRY(double_fault)
  807. CFI_STARTPROC
  808. paranoidentry do_double_fault
  809. jmp paranoid_exit
  810. CFI_ENDPROC
  811. ENTRY(invalid_TSS)
  812. errorentry do_invalid_TSS
  813. ENTRY(segment_not_present)
  814. errorentry do_segment_not_present
  815. /* runs on exception stack */
  816. ENTRY(stack_segment)
  817. CFI_STARTPROC
  818. paranoidentry do_stack_segment
  819. jmp paranoid_exit
  820. CFI_ENDPROC
  821. KPROBE_ENTRY(general_protection)
  822. errorentry do_general_protection
  823. .previous .text
  824. ENTRY(alignment_check)
  825. errorentry do_alignment_check
  826. ENTRY(divide_error)
  827. zeroentry do_divide_error
  828. ENTRY(spurious_interrupt_bug)
  829. zeroentry do_spurious_interrupt_bug
  830. #ifdef CONFIG_X86_MCE
  831. /* runs on exception stack */
  832. ENTRY(machine_check)
  833. CFI_STARTPROC
  834. pushq $0
  835. CFI_ADJUST_CFA_OFFSET 8
  836. paranoidentry do_machine_check
  837. jmp paranoid_exit
  838. CFI_ENDPROC
  839. #endif
  840. ENTRY(call_debug)
  841. zeroentry do_call_debug
  842. ENTRY(call_softirq)
  843. movq %gs:pda_irqstackptr,%rax
  844. pushq %r15
  845. movq %rsp,%r15
  846. incl %gs:pda_irqcount
  847. cmove %rax,%rsp
  848. call __do_softirq
  849. movq %r15,%rsp
  850. decl %gs:pda_irqcount
  851. popq %r15
  852. ret