entry.S 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #define ASSEMBLY 1
  29. #include <linux/config.h>
  30. #include <linux/linkage.h>
  31. #include <asm/segment.h>
  32. #include <asm/smp.h>
  33. #include <asm/cache.h>
  34. #include <asm/errno.h>
  35. #include <asm/dwarf2.h>
  36. #include <asm/calling.h>
  37. #include <asm/offset.h>
  38. #include <asm/msr.h>
  39. #include <asm/unistd.h>
  40. #include <asm/thread_info.h>
  41. #include <asm/hw_irq.h>
  42. .code64
  43. #ifdef CONFIG_PREEMPT
  44. #define preempt_stop cli
  45. #else
  46. #define preempt_stop
  47. #define retint_kernel retint_restore_args
  48. #endif
  49. /*
  50. * C code is not supposed to know about undefined top of stack. Every time
  51. * a C function with an pt_regs argument is called from the SYSCALL based
  52. * fast path FIXUP_TOP_OF_STACK is needed.
  53. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  54. * manipulation.
  55. */
  56. /* %rsp:at FRAMEEND */
  57. .macro FIXUP_TOP_OF_STACK tmp
  58. movq %gs:pda_oldrsp,\tmp
  59. movq \tmp,RSP(%rsp)
  60. movq $__USER_DS,SS(%rsp)
  61. movq $__USER_CS,CS(%rsp)
  62. movq $-1,RCX(%rsp)
  63. movq R11(%rsp),\tmp /* get eflags */
  64. movq \tmp,EFLAGS(%rsp)
  65. .endm
  66. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  67. movq RSP-\offset(%rsp),\tmp
  68. movq \tmp,%gs:pda_oldrsp
  69. movq EFLAGS-\offset(%rsp),\tmp
  70. movq \tmp,R11-\offset(%rsp)
  71. .endm
  72. .macro FAKE_STACK_FRAME child_rip
  73. /* push in order ss, rsp, eflags, cs, rip */
  74. xorq %rax, %rax
  75. pushq %rax /* ss */
  76. CFI_ADJUST_CFA_OFFSET 8
  77. pushq %rax /* rsp */
  78. CFI_ADJUST_CFA_OFFSET 8
  79. CFI_OFFSET rip,0
  80. pushq $(1<<9) /* eflags - interrupts on */
  81. CFI_ADJUST_CFA_OFFSET 8
  82. pushq $__KERNEL_CS /* cs */
  83. CFI_ADJUST_CFA_OFFSET 8
  84. pushq \child_rip /* rip */
  85. CFI_ADJUST_CFA_OFFSET 8
  86. CFI_OFFSET rip,0
  87. pushq %rax /* orig rax */
  88. CFI_ADJUST_CFA_OFFSET 8
  89. .endm
  90. .macro UNFAKE_STACK_FRAME
  91. addq $8*6, %rsp
  92. CFI_ADJUST_CFA_OFFSET -(6*8)
  93. .endm
  94. .macro CFI_DEFAULT_STACK
  95. CFI_ADJUST_CFA_OFFSET (SS)
  96. CFI_OFFSET r15,R15-SS
  97. CFI_OFFSET r14,R14-SS
  98. CFI_OFFSET r13,R13-SS
  99. CFI_OFFSET r12,R12-SS
  100. CFI_OFFSET rbp,RBP-SS
  101. CFI_OFFSET rbx,RBX-SS
  102. CFI_OFFSET r11,R11-SS
  103. CFI_OFFSET r10,R10-SS
  104. CFI_OFFSET r9,R9-SS
  105. CFI_OFFSET r8,R8-SS
  106. CFI_OFFSET rax,RAX-SS
  107. CFI_OFFSET rcx,RCX-SS
  108. CFI_OFFSET rdx,RDX-SS
  109. CFI_OFFSET rsi,RSI-SS
  110. CFI_OFFSET rdi,RDI-SS
  111. CFI_OFFSET rsp,RSP-SS
  112. CFI_OFFSET rip,RIP-SS
  113. .endm
  114. /*
  115. * A newly forked process directly context switches into this.
  116. */
  117. /* rdi: prev */
  118. ENTRY(ret_from_fork)
  119. CFI_STARTPROC
  120. CFI_DEFAULT_STACK
  121. call schedule_tail
  122. GET_THREAD_INFO(%rcx)
  123. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  124. jnz rff_trace
  125. rff_action:
  126. RESTORE_REST
  127. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  128. je int_ret_from_sys_call
  129. testl $_TIF_IA32,threadinfo_flags(%rcx)
  130. jnz int_ret_from_sys_call
  131. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  132. jmp ret_from_sys_call
  133. rff_trace:
  134. movq %rsp,%rdi
  135. call syscall_trace_leave
  136. GET_THREAD_INFO(%rcx)
  137. jmp rff_action
  138. CFI_ENDPROC
  139. /*
  140. * System call entry. Upto 6 arguments in registers are supported.
  141. *
  142. * SYSCALL does not save anything on the stack and does not change the
  143. * stack pointer.
  144. */
  145. /*
  146. * Register setup:
  147. * rax system call number
  148. * rdi arg0
  149. * rcx return address for syscall/sysret, C arg3
  150. * rsi arg1
  151. * rdx arg2
  152. * r10 arg3 (--> moved to rcx for C)
  153. * r8 arg4
  154. * r9 arg5
  155. * r11 eflags for syscall/sysret, temporary for C
  156. * r12-r15,rbp,rbx saved by C code, not touched.
  157. *
  158. * Interrupts are off on entry.
  159. * Only called from user space.
  160. *
  161. * XXX if we had a free scratch register we could save the RSP into the stack frame
  162. * and report it properly in ps. Unfortunately we haven't.
  163. */
  164. ENTRY(system_call)
  165. CFI_STARTPROC
  166. swapgs
  167. movq %rsp,%gs:pda_oldrsp
  168. movq %gs:pda_kernelstack,%rsp
  169. sti
  170. SAVE_ARGS 8,1
  171. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  172. movq %rcx,RIP-ARGOFFSET(%rsp)
  173. GET_THREAD_INFO(%rcx)
  174. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  175. jnz tracesys
  176. cmpq $__NR_syscall_max,%rax
  177. ja badsys
  178. movq %r10,%rcx
  179. call *sys_call_table(,%rax,8) # XXX: rip relative
  180. movq %rax,RAX-ARGOFFSET(%rsp)
  181. /*
  182. * Syscall return path ending with SYSRET (fast path)
  183. * Has incomplete stack frame and undefined top of stack.
  184. */
  185. .globl ret_from_sys_call
  186. ret_from_sys_call:
  187. movl $_TIF_WORK_MASK,%edi
  188. /* edi: flagmask */
  189. sysret_check:
  190. GET_THREAD_INFO(%rcx)
  191. cli
  192. movl threadinfo_flags(%rcx),%edx
  193. andl %edi,%edx
  194. jnz sysret_careful
  195. movq RIP-ARGOFFSET(%rsp),%rcx
  196. RESTORE_ARGS 0,-ARG_SKIP,1
  197. movq %gs:pda_oldrsp,%rsp
  198. swapgs
  199. sysretq
  200. /* Handle reschedules */
  201. /* edx: work, edi: workmask */
  202. sysret_careful:
  203. bt $TIF_NEED_RESCHED,%edx
  204. jnc sysret_signal
  205. sti
  206. pushq %rdi
  207. call schedule
  208. popq %rdi
  209. jmp sysret_check
  210. /* Handle a signal */
  211. sysret_signal:
  212. sti
  213. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  214. jz 1f
  215. /* Really a signal */
  216. /* edx: work flags (arg3) */
  217. leaq do_notify_resume(%rip),%rax
  218. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  219. xorl %esi,%esi # oldset -> arg2
  220. call ptregscall_common
  221. 1: movl $_TIF_NEED_RESCHED,%edi
  222. jmp sysret_check
  223. /* Do syscall tracing */
  224. tracesys:
  225. SAVE_REST
  226. movq $-ENOSYS,RAX(%rsp)
  227. FIXUP_TOP_OF_STACK %rdi
  228. movq %rsp,%rdi
  229. call syscall_trace_enter
  230. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  231. RESTORE_REST
  232. cmpq $__NR_syscall_max,%rax
  233. ja 1f
  234. movq %r10,%rcx /* fixup for C */
  235. call *sys_call_table(,%rax,8)
  236. movq %rax,RAX-ARGOFFSET(%rsp)
  237. 1: SAVE_REST
  238. movq %rsp,%rdi
  239. call syscall_trace_leave
  240. RESTORE_TOP_OF_STACK %rbx
  241. RESTORE_REST
  242. jmp ret_from_sys_call
  243. badsys:
  244. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  245. jmp ret_from_sys_call
  246. /*
  247. * Syscall return path ending with IRET.
  248. * Has correct top of stack, but partial stack frame.
  249. */
  250. ENTRY(int_ret_from_sys_call)
  251. cli
  252. testl $3,CS-ARGOFFSET(%rsp)
  253. je retint_restore_args
  254. movl $_TIF_ALLWORK_MASK,%edi
  255. /* edi: mask to check */
  256. int_with_check:
  257. GET_THREAD_INFO(%rcx)
  258. movl threadinfo_flags(%rcx),%edx
  259. andl %edi,%edx
  260. jnz int_careful
  261. jmp retint_swapgs
  262. /* Either reschedule or signal or syscall exit tracking needed. */
  263. /* First do a reschedule test. */
  264. /* edx: work, edi: workmask */
  265. int_careful:
  266. bt $TIF_NEED_RESCHED,%edx
  267. jnc int_very_careful
  268. sti
  269. pushq %rdi
  270. call schedule
  271. popq %rdi
  272. jmp int_with_check
  273. /* handle signals and tracing -- both require a full stack frame */
  274. int_very_careful:
  275. sti
  276. SAVE_REST
  277. /* Check for syscall exit trace */
  278. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  279. jz int_signal
  280. pushq %rdi
  281. leaq 8(%rsp),%rdi # &ptregs -> arg1
  282. call syscall_trace_leave
  283. popq %rdi
  284. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  285. jmp int_restore_rest
  286. int_signal:
  287. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  288. jz 1f
  289. movq %rsp,%rdi # &ptregs -> arg1
  290. xorl %esi,%esi # oldset -> arg2
  291. call do_notify_resume
  292. 1: movl $_TIF_NEED_RESCHED,%edi
  293. int_restore_rest:
  294. RESTORE_REST
  295. jmp int_with_check
  296. CFI_ENDPROC
  297. /*
  298. * Certain special system calls that need to save a complete full stack frame.
  299. */
  300. .macro PTREGSCALL label,func,arg
  301. .globl \label
  302. \label:
  303. leaq \func(%rip),%rax
  304. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  305. jmp ptregscall_common
  306. .endm
  307. PTREGSCALL stub_clone, sys_clone, %r8
  308. PTREGSCALL stub_fork, sys_fork, %rdi
  309. PTREGSCALL stub_vfork, sys_vfork, %rdi
  310. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  311. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  312. PTREGSCALL stub_iopl, sys_iopl, %rsi
  313. ENTRY(ptregscall_common)
  314. CFI_STARTPROC
  315. popq %r11
  316. CFI_ADJUST_CFA_OFFSET -8
  317. SAVE_REST
  318. movq %r11, %r15
  319. FIXUP_TOP_OF_STACK %r11
  320. call *%rax
  321. RESTORE_TOP_OF_STACK %r11
  322. movq %r15, %r11
  323. RESTORE_REST
  324. pushq %r11
  325. CFI_ADJUST_CFA_OFFSET 8
  326. ret
  327. CFI_ENDPROC
  328. ENTRY(stub_execve)
  329. CFI_STARTPROC
  330. popq %r11
  331. CFI_ADJUST_CFA_OFFSET -8
  332. SAVE_REST
  333. movq %r11, %r15
  334. FIXUP_TOP_OF_STACK %r11
  335. call sys_execve
  336. GET_THREAD_INFO(%rcx)
  337. bt $TIF_IA32,threadinfo_flags(%rcx)
  338. jc exec_32bit
  339. RESTORE_TOP_OF_STACK %r11
  340. movq %r15, %r11
  341. RESTORE_REST
  342. push %r11
  343. ret
  344. exec_32bit:
  345. CFI_ADJUST_CFA_OFFSET REST_SKIP
  346. movq %rax,RAX(%rsp)
  347. RESTORE_REST
  348. jmp int_ret_from_sys_call
  349. CFI_ENDPROC
  350. /*
  351. * sigreturn is special because it needs to restore all registers on return.
  352. * This cannot be done with SYSRET, so use the IRET return path instead.
  353. */
  354. ENTRY(stub_rt_sigreturn)
  355. CFI_STARTPROC
  356. addq $8, %rsp
  357. SAVE_REST
  358. movq %rsp,%rdi
  359. FIXUP_TOP_OF_STACK %r11
  360. call sys_rt_sigreturn
  361. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  362. RESTORE_REST
  363. jmp int_ret_from_sys_call
  364. CFI_ENDPROC
  365. /*
  366. * Interrupt entry/exit.
  367. *
  368. * Interrupt entry points save only callee clobbered registers in fast path.
  369. *
  370. * Entry runs with interrupts off.
  371. */
  372. /* 0(%rsp): interrupt number */
  373. .macro interrupt func
  374. CFI_STARTPROC simple
  375. CFI_DEF_CFA rsp,(SS-RDI)
  376. CFI_REL_OFFSET rsp,(RSP-ORIG_RAX)
  377. CFI_REL_OFFSET rip,(RIP-ORIG_RAX)
  378. cld
  379. #ifdef CONFIG_DEBUG_INFO
  380. SAVE_ALL
  381. movq %rsp,%rdi
  382. /*
  383. * Setup a stack frame pointer. This allows gdb to trace
  384. * back to the original stack.
  385. */
  386. movq %rsp,%rbp
  387. CFI_DEF_CFA_REGISTER rbp
  388. #else
  389. SAVE_ARGS
  390. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  391. #endif
  392. testl $3,CS(%rdi)
  393. je 1f
  394. swapgs
  395. 1: addl $1,%gs:pda_irqcount # RED-PEN should check preempt count
  396. movq %gs:pda_irqstackptr,%rax
  397. cmoveq %rax,%rsp
  398. pushq %rdi # save old stack
  399. call \func
  400. .endm
  401. ENTRY(common_interrupt)
  402. interrupt do_IRQ
  403. /* 0(%rsp): oldrsp-ARGOFFSET */
  404. ret_from_intr:
  405. popq %rdi
  406. cli
  407. subl $1,%gs:pda_irqcount
  408. #ifdef CONFIG_DEBUG_INFO
  409. movq RBP(%rdi),%rbp
  410. #endif
  411. leaq ARGOFFSET(%rdi),%rsp
  412. exit_intr:
  413. GET_THREAD_INFO(%rcx)
  414. testl $3,CS-ARGOFFSET(%rsp)
  415. je retint_kernel
  416. /* Interrupt came from user space */
  417. /*
  418. * Has a correct top of stack, but a partial stack frame
  419. * %rcx: thread info. Interrupts off.
  420. */
  421. retint_with_reschedule:
  422. movl $_TIF_WORK_MASK,%edi
  423. retint_check:
  424. movl threadinfo_flags(%rcx),%edx
  425. andl %edi,%edx
  426. jnz retint_careful
  427. retint_swapgs:
  428. cli
  429. swapgs
  430. retint_restore_args:
  431. cli
  432. RESTORE_ARGS 0,8,0
  433. iret_label:
  434. iretq
  435. .section __ex_table,"a"
  436. .quad iret_label,bad_iret
  437. .previous
  438. .section .fixup,"ax"
  439. /* force a signal here? this matches i386 behaviour */
  440. /* running with kernel gs */
  441. bad_iret:
  442. movq $-9999,%rdi /* better code? */
  443. jmp do_exit
  444. .previous
  445. /* edi: workmask, edx: work */
  446. retint_careful:
  447. bt $TIF_NEED_RESCHED,%edx
  448. jnc retint_signal
  449. sti
  450. pushq %rdi
  451. call schedule
  452. popq %rdi
  453. GET_THREAD_INFO(%rcx)
  454. cli
  455. jmp retint_check
  456. retint_signal:
  457. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  458. jz retint_swapgs
  459. sti
  460. SAVE_REST
  461. movq $-1,ORIG_RAX(%rsp)
  462. xorq %rsi,%rsi # oldset
  463. movq %rsp,%rdi # &pt_regs
  464. call do_notify_resume
  465. RESTORE_REST
  466. cli
  467. movl $_TIF_NEED_RESCHED,%edi
  468. GET_THREAD_INFO(%rcx)
  469. jmp retint_check
  470. #ifdef CONFIG_PREEMPT
  471. /* Returning to kernel space. Check if we need preemption */
  472. /* rcx: threadinfo. interrupts off. */
  473. .p2align
  474. retint_kernel:
  475. cmpl $0,threadinfo_preempt_count(%rcx)
  476. jnz retint_restore_args
  477. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  478. jnc retint_restore_args
  479. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  480. jnc retint_restore_args
  481. call preempt_schedule_irq
  482. jmp exit_intr
  483. #endif
  484. CFI_ENDPROC
  485. /*
  486. * APIC interrupts.
  487. */
  488. .macro apicinterrupt num,func
  489. pushq $\num-256
  490. interrupt \func
  491. jmp ret_from_intr
  492. CFI_ENDPROC
  493. .endm
  494. ENTRY(thermal_interrupt)
  495. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  496. #ifdef CONFIG_SMP
  497. ENTRY(reschedule_interrupt)
  498. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  499. ENTRY(invalidate_interrupt)
  500. apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt
  501. ENTRY(call_function_interrupt)
  502. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  503. #endif
  504. #ifdef CONFIG_X86_LOCAL_APIC
  505. ENTRY(apic_timer_interrupt)
  506. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  507. ENTRY(error_interrupt)
  508. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  509. ENTRY(spurious_interrupt)
  510. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  511. #endif
  512. /*
  513. * Exception entry points.
  514. */
  515. .macro zeroentry sym
  516. pushq $0 /* push error code/oldrax */
  517. pushq %rax /* push real oldrax to the rdi slot */
  518. leaq \sym(%rip),%rax
  519. jmp error_entry
  520. .endm
  521. .macro errorentry sym
  522. pushq %rax
  523. leaq \sym(%rip),%rax
  524. jmp error_entry
  525. .endm
  526. /* error code is on the stack already */
  527. /* handle NMI like exceptions that can happen everywhere */
  528. .macro paranoidentry sym
  529. SAVE_ALL
  530. cld
  531. movl $1,%ebx
  532. movl $MSR_GS_BASE,%ecx
  533. rdmsr
  534. testl %edx,%edx
  535. js 1f
  536. swapgs
  537. xorl %ebx,%ebx
  538. 1: movq %rsp,%rdi
  539. movq ORIG_RAX(%rsp),%rsi
  540. movq $-1,ORIG_RAX(%rsp)
  541. call \sym
  542. .endm
  543. /*
  544. * Exception entry point. This expects an error code/orig_rax on the stack
  545. * and the exception handler in %rax.
  546. */
  547. ENTRY(error_entry)
  548. CFI_STARTPROC simple
  549. CFI_DEF_CFA rsp,(SS-RDI)
  550. CFI_REL_OFFSET rsp,(RSP-RDI)
  551. CFI_REL_OFFSET rip,(RIP-RDI)
  552. /* rdi slot contains rax, oldrax contains error code */
  553. cld
  554. subq $14*8,%rsp
  555. CFI_ADJUST_CFA_OFFSET (14*8)
  556. movq %rsi,13*8(%rsp)
  557. CFI_REL_OFFSET rsi,RSI
  558. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  559. movq %rdx,12*8(%rsp)
  560. CFI_REL_OFFSET rdx,RDX
  561. movq %rcx,11*8(%rsp)
  562. CFI_REL_OFFSET rcx,RCX
  563. movq %rsi,10*8(%rsp) /* store rax */
  564. CFI_REL_OFFSET rax,RAX
  565. movq %r8, 9*8(%rsp)
  566. CFI_REL_OFFSET r8,R8
  567. movq %r9, 8*8(%rsp)
  568. CFI_REL_OFFSET r9,R9
  569. movq %r10,7*8(%rsp)
  570. CFI_REL_OFFSET r10,R10
  571. movq %r11,6*8(%rsp)
  572. CFI_REL_OFFSET r11,R11
  573. movq %rbx,5*8(%rsp)
  574. CFI_REL_OFFSET rbx,RBX
  575. movq %rbp,4*8(%rsp)
  576. CFI_REL_OFFSET rbp,RBP
  577. movq %r12,3*8(%rsp)
  578. CFI_REL_OFFSET r12,R12
  579. movq %r13,2*8(%rsp)
  580. CFI_REL_OFFSET r13,R13
  581. movq %r14,1*8(%rsp)
  582. CFI_REL_OFFSET r14,R14
  583. movq %r15,(%rsp)
  584. CFI_REL_OFFSET r15,R15
  585. xorl %ebx,%ebx
  586. testl $3,CS(%rsp)
  587. je error_kernelspace
  588. error_swapgs:
  589. swapgs
  590. error_sti:
  591. movq %rdi,RDI(%rsp)
  592. movq %rsp,%rdi
  593. movq ORIG_RAX(%rsp),%rsi /* get error code */
  594. movq $-1,ORIG_RAX(%rsp)
  595. call *%rax
  596. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  597. error_exit:
  598. movl %ebx,%eax
  599. RESTORE_REST
  600. cli
  601. GET_THREAD_INFO(%rcx)
  602. testl %eax,%eax
  603. jne retint_kernel
  604. movl threadinfo_flags(%rcx),%edx
  605. movl $_TIF_WORK_MASK,%edi
  606. andl %edi,%edx
  607. jnz retint_careful
  608. swapgs
  609. RESTORE_ARGS 0,8,0
  610. iretq
  611. CFI_ENDPROC
  612. error_kernelspace:
  613. incl %ebx
  614. /* There are two places in the kernel that can potentially fault with
  615. usergs. Handle them here. The exception handlers after
  616. iret run with kernel gs again, so don't set the user space flag.
  617. B stepping K8s sometimes report an truncated RIP for IRET
  618. exceptions returning to compat mode. Check for these here too. */
  619. leaq iret_label(%rip),%rbp
  620. cmpq %rbp,RIP(%rsp)
  621. je error_swapgs
  622. movl %ebp,%ebp /* zero extend */
  623. cmpq %rbp,RIP(%rsp)
  624. je error_swapgs
  625. cmpq $gs_change,RIP(%rsp)
  626. je error_swapgs
  627. jmp error_sti
  628. /* Reload gs selector with exception handling */
  629. /* edi: new selector */
  630. ENTRY(load_gs_index)
  631. pushf
  632. cli
  633. swapgs
  634. gs_change:
  635. movl %edi,%gs
  636. 2: mfence /* workaround */
  637. swapgs
  638. popf
  639. ret
  640. .section __ex_table,"a"
  641. .align 8
  642. .quad gs_change,bad_gs
  643. .previous
  644. .section .fixup,"ax"
  645. /* running with kernelgs */
  646. bad_gs:
  647. swapgs /* switch back to user gs */
  648. xorl %eax,%eax
  649. movl %eax,%gs
  650. jmp 2b
  651. .previous
  652. /*
  653. * Create a kernel thread.
  654. *
  655. * C extern interface:
  656. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  657. *
  658. * asm input arguments:
  659. * rdi: fn, rsi: arg, rdx: flags
  660. */
  661. ENTRY(kernel_thread)
  662. CFI_STARTPROC
  663. FAKE_STACK_FRAME $child_rip
  664. SAVE_ALL
  665. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  666. movq %rdx,%rdi
  667. orq kernel_thread_flags(%rip),%rdi
  668. movq $-1, %rsi
  669. movq %rsp, %rdx
  670. xorl %r8d,%r8d
  671. xorl %r9d,%r9d
  672. # clone now
  673. call do_fork
  674. movq %rax,RAX(%rsp)
  675. xorl %edi,%edi
  676. /*
  677. * It isn't worth to check for reschedule here,
  678. * so internally to the x86_64 port you can rely on kernel_thread()
  679. * not to reschedule the child before returning, this avoids the need
  680. * of hacks for example to fork off the per-CPU idle tasks.
  681. * [Hopefully no generic code relies on the reschedule -AK]
  682. */
  683. RESTORE_ALL
  684. UNFAKE_STACK_FRAME
  685. ret
  686. CFI_ENDPROC
  687. child_rip:
  688. /*
  689. * Here we are in the child and the registers are set as they were
  690. * at kernel_thread() invocation in the parent.
  691. */
  692. movq %rdi, %rax
  693. movq %rsi, %rdi
  694. call *%rax
  695. # exit
  696. xorq %rdi, %rdi
  697. call do_exit
  698. /*
  699. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  700. *
  701. * C extern interface:
  702. * extern long execve(char *name, char **argv, char **envp)
  703. *
  704. * asm input arguments:
  705. * rdi: name, rsi: argv, rdx: envp
  706. *
  707. * We want to fallback into:
  708. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  709. *
  710. * do_sys_execve asm fallback arguments:
  711. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  712. */
  713. ENTRY(execve)
  714. CFI_STARTPROC
  715. FAKE_STACK_FRAME $0
  716. SAVE_ALL
  717. call sys_execve
  718. movq %rax, RAX(%rsp)
  719. RESTORE_REST
  720. testq %rax,%rax
  721. je int_ret_from_sys_call
  722. RESTORE_ARGS
  723. UNFAKE_STACK_FRAME
  724. ret
  725. CFI_ENDPROC
  726. ENTRY(page_fault)
  727. errorentry do_page_fault
  728. ENTRY(coprocessor_error)
  729. zeroentry do_coprocessor_error
  730. ENTRY(simd_coprocessor_error)
  731. zeroentry do_simd_coprocessor_error
  732. ENTRY(device_not_available)
  733. zeroentry math_state_restore
  734. /* runs on exception stack */
  735. ENTRY(debug)
  736. CFI_STARTPROC
  737. pushq $0
  738. CFI_ADJUST_CFA_OFFSET 8
  739. paranoidentry do_debug
  740. /* switch back to process stack to restore the state ptrace touched */
  741. movq %rax,%rsp
  742. testl $3,CS(%rsp)
  743. jnz paranoid_userspace
  744. jmp paranoid_exit
  745. CFI_ENDPROC
  746. /* runs on exception stack */
  747. ENTRY(nmi)
  748. CFI_STARTPROC
  749. pushq $-1
  750. CFI_ADJUST_CFA_OFFSET 8
  751. paranoidentry do_nmi
  752. /* ebx: no swapgs flag */
  753. paranoid_exit:
  754. testl %ebx,%ebx /* swapgs needed? */
  755. jnz paranoid_restore
  756. paranoid_swapgs:
  757. cli
  758. swapgs
  759. paranoid_restore:
  760. RESTORE_ALL 8
  761. iretq
  762. paranoid_userspace:
  763. cli
  764. GET_THREAD_INFO(%rcx)
  765. movl threadinfo_flags(%rcx),%edx
  766. testl $_TIF_NEED_RESCHED,%edx
  767. jnz paranoid_resched
  768. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  769. jnz paranoid_signal
  770. jmp paranoid_swapgs
  771. paranoid_resched:
  772. sti
  773. call schedule
  774. jmp paranoid_exit
  775. paranoid_signal:
  776. sti
  777. xorl %esi,%esi /* oldset */
  778. movq %rsp,%rdi /* &pt_regs */
  779. call do_notify_resume
  780. jmp paranoid_exit
  781. CFI_ENDPROC
  782. ENTRY(int3)
  783. zeroentry do_int3
  784. ENTRY(overflow)
  785. zeroentry do_overflow
  786. ENTRY(bounds)
  787. zeroentry do_bounds
  788. ENTRY(invalid_op)
  789. zeroentry do_invalid_op
  790. ENTRY(coprocessor_segment_overrun)
  791. zeroentry do_coprocessor_segment_overrun
  792. ENTRY(reserved)
  793. zeroentry do_reserved
  794. /* runs on exception stack */
  795. ENTRY(double_fault)
  796. CFI_STARTPROC
  797. paranoidentry do_double_fault
  798. movq %rax,%rsp
  799. testl $3,CS(%rsp)
  800. jnz paranoid_userspace
  801. jmp paranoid_exit
  802. CFI_ENDPROC
  803. ENTRY(invalid_TSS)
  804. errorentry do_invalid_TSS
  805. ENTRY(segment_not_present)
  806. errorentry do_segment_not_present
  807. /* runs on exception stack */
  808. ENTRY(stack_segment)
  809. CFI_STARTPROC
  810. paranoidentry do_stack_segment
  811. movq %rax,%rsp
  812. testl $3,CS(%rsp)
  813. jnz paranoid_userspace
  814. jmp paranoid_exit
  815. CFI_ENDPROC
  816. ENTRY(general_protection)
  817. errorentry do_general_protection
  818. ENTRY(alignment_check)
  819. errorentry do_alignment_check
  820. ENTRY(divide_error)
  821. zeroentry do_divide_error
  822. ENTRY(spurious_interrupt_bug)
  823. zeroentry do_spurious_interrupt_bug
  824. #ifdef CONFIG_X86_MCE
  825. /* runs on exception stack */
  826. ENTRY(machine_check)
  827. CFI_STARTPROC
  828. pushq $0
  829. CFI_ADJUST_CFA_OFFSET 8
  830. paranoidentry do_machine_check
  831. jmp paranoid_exit
  832. CFI_ENDPROC
  833. #endif
  834. ENTRY(call_debug)
  835. zeroentry do_call_debug