entry.S 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #define ASSEMBLY 1
  29. #include <linux/config.h>
  30. #include <linux/linkage.h>
  31. #include <asm/segment.h>
  32. #include <asm/smp.h>
  33. #include <asm/cache.h>
  34. #include <asm/errno.h>
  35. #include <asm/dwarf2.h>
  36. #include <asm/calling.h>
  37. #include <asm/asm-offsets.h>
  38. #include <asm/msr.h>
  39. #include <asm/unistd.h>
  40. #include <asm/thread_info.h>
  41. #include <asm/hw_irq.h>
  42. #include <asm/page.h>
  43. .code64
  44. #ifndef CONFIG_PREEMPT
  45. #define retint_kernel retint_restore_args
  46. #endif
  47. /*
  48. * C code is not supposed to know about undefined top of stack. Every time
  49. * a C function with an pt_regs argument is called from the SYSCALL based
  50. * fast path FIXUP_TOP_OF_STACK is needed.
  51. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  52. * manipulation.
  53. */
  54. /* %rsp:at FRAMEEND */
  55. .macro FIXUP_TOP_OF_STACK tmp
  56. movq %gs:pda_oldrsp,\tmp
  57. movq \tmp,RSP(%rsp)
  58. movq $__USER_DS,SS(%rsp)
  59. movq $__USER_CS,CS(%rsp)
  60. movq $-1,RCX(%rsp)
  61. movq R11(%rsp),\tmp /* get eflags */
  62. movq \tmp,EFLAGS(%rsp)
  63. .endm
  64. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  65. movq RSP-\offset(%rsp),\tmp
  66. movq \tmp,%gs:pda_oldrsp
  67. movq EFLAGS-\offset(%rsp),\tmp
  68. movq \tmp,R11-\offset(%rsp)
  69. .endm
  70. .macro FAKE_STACK_FRAME child_rip
  71. /* push in order ss, rsp, eflags, cs, rip */
  72. xorl %eax, %eax
  73. pushq %rax /* ss */
  74. CFI_ADJUST_CFA_OFFSET 8
  75. /*CFI_REL_OFFSET ss,0*/
  76. pushq %rax /* rsp */
  77. CFI_ADJUST_CFA_OFFSET 8
  78. CFI_REL_OFFSET rsp,0
  79. pushq $(1<<9) /* eflags - interrupts on */
  80. CFI_ADJUST_CFA_OFFSET 8
  81. /*CFI_REL_OFFSET rflags,0*/
  82. pushq $__KERNEL_CS /* cs */
  83. CFI_ADJUST_CFA_OFFSET 8
  84. /*CFI_REL_OFFSET cs,0*/
  85. pushq \child_rip /* rip */
  86. CFI_ADJUST_CFA_OFFSET 8
  87. CFI_REL_OFFSET rip,0
  88. pushq %rax /* orig rax */
  89. CFI_ADJUST_CFA_OFFSET 8
  90. .endm
  91. .macro UNFAKE_STACK_FRAME
  92. addq $8*6, %rsp
  93. CFI_ADJUST_CFA_OFFSET -(6*8)
  94. .endm
  95. .macro CFI_DEFAULT_STACK start=1
  96. .if \start
  97. CFI_STARTPROC simple
  98. CFI_DEF_CFA rsp,SS+8
  99. .else
  100. CFI_DEF_CFA_OFFSET SS+8
  101. .endif
  102. CFI_REL_OFFSET r15,R15
  103. CFI_REL_OFFSET r14,R14
  104. CFI_REL_OFFSET r13,R13
  105. CFI_REL_OFFSET r12,R12
  106. CFI_REL_OFFSET rbp,RBP
  107. CFI_REL_OFFSET rbx,RBX
  108. CFI_REL_OFFSET r11,R11
  109. CFI_REL_OFFSET r10,R10
  110. CFI_REL_OFFSET r9,R9
  111. CFI_REL_OFFSET r8,R8
  112. CFI_REL_OFFSET rax,RAX
  113. CFI_REL_OFFSET rcx,RCX
  114. CFI_REL_OFFSET rdx,RDX
  115. CFI_REL_OFFSET rsi,RSI
  116. CFI_REL_OFFSET rdi,RDI
  117. CFI_REL_OFFSET rip,RIP
  118. /*CFI_REL_OFFSET cs,CS*/
  119. /*CFI_REL_OFFSET rflags,EFLAGS*/
  120. CFI_REL_OFFSET rsp,RSP
  121. /*CFI_REL_OFFSET ss,SS*/
  122. .endm
  123. /*
  124. * A newly forked process directly context switches into this.
  125. */
  126. /* rdi: prev */
  127. ENTRY(ret_from_fork)
  128. CFI_DEFAULT_STACK
  129. call schedule_tail
  130. GET_THREAD_INFO(%rcx)
  131. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  132. jnz rff_trace
  133. rff_action:
  134. RESTORE_REST
  135. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  136. je int_ret_from_sys_call
  137. testl $_TIF_IA32,threadinfo_flags(%rcx)
  138. jnz int_ret_from_sys_call
  139. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  140. jmp ret_from_sys_call
  141. rff_trace:
  142. movq %rsp,%rdi
  143. call syscall_trace_leave
  144. GET_THREAD_INFO(%rcx)
  145. jmp rff_action
  146. CFI_ENDPROC
  147. /*
  148. * System call entry. Upto 6 arguments in registers are supported.
  149. *
  150. * SYSCALL does not save anything on the stack and does not change the
  151. * stack pointer.
  152. */
  153. /*
  154. * Register setup:
  155. * rax system call number
  156. * rdi arg0
  157. * rcx return address for syscall/sysret, C arg3
  158. * rsi arg1
  159. * rdx arg2
  160. * r10 arg3 (--> moved to rcx for C)
  161. * r8 arg4
  162. * r9 arg5
  163. * r11 eflags for syscall/sysret, temporary for C
  164. * r12-r15,rbp,rbx saved by C code, not touched.
  165. *
  166. * Interrupts are off on entry.
  167. * Only called from user space.
  168. *
  169. * XXX if we had a free scratch register we could save the RSP into the stack frame
  170. * and report it properly in ps. Unfortunately we haven't.
  171. *
  172. * When user can change the frames always force IRET. That is because
  173. * it deals with uncanonical addresses better. SYSRET has trouble
  174. * with them due to bugs in both AMD and Intel CPUs.
  175. */
  176. ENTRY(system_call)
  177. CFI_STARTPROC simple
  178. CFI_DEF_CFA rsp,0
  179. CFI_REGISTER rip,rcx
  180. /*CFI_REGISTER rflags,r11*/
  181. swapgs
  182. movq %rsp,%gs:pda_oldrsp
  183. movq %gs:pda_kernelstack,%rsp
  184. sti
  185. SAVE_ARGS 8,1
  186. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  187. movq %rcx,RIP-ARGOFFSET(%rsp)
  188. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  189. GET_THREAD_INFO(%rcx)
  190. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  191. CFI_REMEMBER_STATE
  192. jnz tracesys
  193. cmpq $__NR_syscall_max,%rax
  194. ja badsys
  195. movq %r10,%rcx
  196. call *sys_call_table(,%rax,8) # XXX: rip relative
  197. movq %rax,RAX-ARGOFFSET(%rsp)
  198. /*
  199. * Syscall return path ending with SYSRET (fast path)
  200. * Has incomplete stack frame and undefined top of stack.
  201. */
  202. .globl ret_from_sys_call
  203. ret_from_sys_call:
  204. movl $_TIF_ALLWORK_MASK,%edi
  205. /* edi: flagmask */
  206. sysret_check:
  207. GET_THREAD_INFO(%rcx)
  208. cli
  209. movl threadinfo_flags(%rcx),%edx
  210. andl %edi,%edx
  211. CFI_REMEMBER_STATE
  212. jnz sysret_careful
  213. movq RIP-ARGOFFSET(%rsp),%rcx
  214. CFI_REGISTER rip,rcx
  215. RESTORE_ARGS 0,-ARG_SKIP,1
  216. /*CFI_REGISTER rflags,r11*/
  217. movq %gs:pda_oldrsp,%rsp
  218. swapgs
  219. sysretq
  220. /* Handle reschedules */
  221. /* edx: work, edi: workmask */
  222. sysret_careful:
  223. CFI_RESTORE_STATE
  224. bt $TIF_NEED_RESCHED,%edx
  225. jnc sysret_signal
  226. sti
  227. pushq %rdi
  228. CFI_ADJUST_CFA_OFFSET 8
  229. call schedule
  230. popq %rdi
  231. CFI_ADJUST_CFA_OFFSET -8
  232. jmp sysret_check
  233. /* Handle a signal */
  234. sysret_signal:
  235. sti
  236. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  237. jz 1f
  238. /* Really a signal */
  239. /* edx: work flags (arg3) */
  240. leaq do_notify_resume(%rip),%rax
  241. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  242. xorl %esi,%esi # oldset -> arg2
  243. call ptregscall_common
  244. 1: movl $_TIF_NEED_RESCHED,%edi
  245. /* Use IRET because user could have changed frame. This
  246. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  247. cli
  248. jmp int_with_check
  249. badsys:
  250. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  251. jmp ret_from_sys_call
  252. /* Do syscall tracing */
  253. tracesys:
  254. CFI_RESTORE_STATE
  255. SAVE_REST
  256. movq $-ENOSYS,RAX(%rsp)
  257. FIXUP_TOP_OF_STACK %rdi
  258. movq %rsp,%rdi
  259. call syscall_trace_enter
  260. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  261. RESTORE_REST
  262. cmpq $__NR_syscall_max,%rax
  263. ja 1f
  264. movq %r10,%rcx /* fixup for C */
  265. call *sys_call_table(,%rax,8)
  266. movq %rax,RAX-ARGOFFSET(%rsp)
  267. 1: SAVE_REST
  268. movq %rsp,%rdi
  269. call syscall_trace_leave
  270. RESTORE_TOP_OF_STACK %rbx
  271. RESTORE_REST
  272. /* Use IRET because user could have changed frame */
  273. jmp int_ret_from_sys_call
  274. CFI_ENDPROC
  275. /*
  276. * Syscall return path ending with IRET.
  277. * Has correct top of stack, but partial stack frame.
  278. */
  279. ENTRY(int_ret_from_sys_call)
  280. CFI_STARTPROC simple
  281. CFI_DEF_CFA rsp,SS+8-ARGOFFSET
  282. /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
  283. CFI_REL_OFFSET rsp,RSP-ARGOFFSET
  284. /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
  285. /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
  286. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  287. CFI_REL_OFFSET rdx,RDX-ARGOFFSET
  288. CFI_REL_OFFSET rcx,RCX-ARGOFFSET
  289. CFI_REL_OFFSET rax,RAX-ARGOFFSET
  290. CFI_REL_OFFSET rdi,RDI-ARGOFFSET
  291. CFI_REL_OFFSET rsi,RSI-ARGOFFSET
  292. CFI_REL_OFFSET r8,R8-ARGOFFSET
  293. CFI_REL_OFFSET r9,R9-ARGOFFSET
  294. CFI_REL_OFFSET r10,R10-ARGOFFSET
  295. CFI_REL_OFFSET r11,R11-ARGOFFSET
  296. cli
  297. testl $3,CS-ARGOFFSET(%rsp)
  298. je retint_restore_args
  299. movl $_TIF_ALLWORK_MASK,%edi
  300. /* edi: mask to check */
  301. int_with_check:
  302. GET_THREAD_INFO(%rcx)
  303. movl threadinfo_flags(%rcx),%edx
  304. andl %edi,%edx
  305. jnz int_careful
  306. andl $~TS_COMPAT,threadinfo_status(%rcx)
  307. jmp retint_swapgs
  308. /* Either reschedule or signal or syscall exit tracking needed. */
  309. /* First do a reschedule test. */
  310. /* edx: work, edi: workmask */
  311. int_careful:
  312. bt $TIF_NEED_RESCHED,%edx
  313. jnc int_very_careful
  314. sti
  315. pushq %rdi
  316. CFI_ADJUST_CFA_OFFSET 8
  317. call schedule
  318. popq %rdi
  319. CFI_ADJUST_CFA_OFFSET -8
  320. cli
  321. jmp int_with_check
  322. /* handle signals and tracing -- both require a full stack frame */
  323. int_very_careful:
  324. sti
  325. SAVE_REST
  326. /* Check for syscall exit trace */
  327. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  328. jz int_signal
  329. pushq %rdi
  330. CFI_ADJUST_CFA_OFFSET 8
  331. leaq 8(%rsp),%rdi # &ptregs -> arg1
  332. call syscall_trace_leave
  333. popq %rdi
  334. CFI_ADJUST_CFA_OFFSET -8
  335. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  336. cli
  337. jmp int_restore_rest
  338. int_signal:
  339. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  340. jz 1f
  341. movq %rsp,%rdi # &ptregs -> arg1
  342. xorl %esi,%esi # oldset -> arg2
  343. call do_notify_resume
  344. 1: movl $_TIF_NEED_RESCHED,%edi
  345. int_restore_rest:
  346. RESTORE_REST
  347. cli
  348. jmp int_with_check
  349. CFI_ENDPROC
  350. /*
  351. * Certain special system calls that need to save a complete full stack frame.
  352. */
  353. .macro PTREGSCALL label,func,arg
  354. .globl \label
  355. \label:
  356. leaq \func(%rip),%rax
  357. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  358. jmp ptregscall_common
  359. .endm
  360. CFI_STARTPROC
  361. PTREGSCALL stub_clone, sys_clone, %r8
  362. PTREGSCALL stub_fork, sys_fork, %rdi
  363. PTREGSCALL stub_vfork, sys_vfork, %rdi
  364. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  365. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  366. PTREGSCALL stub_iopl, sys_iopl, %rsi
  367. ENTRY(ptregscall_common)
  368. popq %r11
  369. CFI_ADJUST_CFA_OFFSET -8
  370. CFI_REGISTER rip, r11
  371. SAVE_REST
  372. movq %r11, %r15
  373. CFI_REGISTER rip, r15
  374. FIXUP_TOP_OF_STACK %r11
  375. call *%rax
  376. RESTORE_TOP_OF_STACK %r11
  377. movq %r15, %r11
  378. CFI_REGISTER rip, r11
  379. RESTORE_REST
  380. pushq %r11
  381. CFI_ADJUST_CFA_OFFSET 8
  382. CFI_REL_OFFSET rip, 0
  383. ret
  384. CFI_ENDPROC
  385. ENTRY(stub_execve)
  386. CFI_STARTPROC
  387. popq %r11
  388. CFI_ADJUST_CFA_OFFSET -8
  389. CFI_REGISTER rip, r11
  390. SAVE_REST
  391. FIXUP_TOP_OF_STACK %r11
  392. call sys_execve
  393. RESTORE_TOP_OF_STACK %r11
  394. movq %rax,RAX(%rsp)
  395. RESTORE_REST
  396. jmp int_ret_from_sys_call
  397. CFI_ENDPROC
  398. /*
  399. * sigreturn is special because it needs to restore all registers on return.
  400. * This cannot be done with SYSRET, so use the IRET return path instead.
  401. */
  402. ENTRY(stub_rt_sigreturn)
  403. CFI_STARTPROC
  404. addq $8, %rsp
  405. CFI_ADJUST_CFA_OFFSET -8
  406. SAVE_REST
  407. movq %rsp,%rdi
  408. FIXUP_TOP_OF_STACK %r11
  409. call sys_rt_sigreturn
  410. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  411. RESTORE_REST
  412. jmp int_ret_from_sys_call
  413. CFI_ENDPROC
  414. /*
  415. * initial frame state for interrupts and exceptions
  416. */
  417. .macro _frame ref
  418. CFI_STARTPROC simple
  419. CFI_DEF_CFA rsp,SS+8-\ref
  420. /*CFI_REL_OFFSET ss,SS-\ref*/
  421. CFI_REL_OFFSET rsp,RSP-\ref
  422. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  423. /*CFI_REL_OFFSET cs,CS-\ref*/
  424. CFI_REL_OFFSET rip,RIP-\ref
  425. .endm
  426. /* initial frame state for interrupts (and exceptions without error code) */
  427. #define INTR_FRAME _frame RIP
  428. /* initial frame state for exceptions with error code (and interrupts with
  429. vector already pushed) */
  430. #define XCPT_FRAME _frame ORIG_RAX
  431. /*
  432. * Interrupt entry/exit.
  433. *
  434. * Interrupt entry points save only callee clobbered registers in fast path.
  435. *
  436. * Entry runs with interrupts off.
  437. */
  438. /* 0(%rsp): interrupt number */
  439. .macro interrupt func
  440. cld
  441. #ifdef CONFIG_DEBUG_INFO
  442. SAVE_ALL
  443. movq %rsp,%rdi
  444. /*
  445. * Setup a stack frame pointer. This allows gdb to trace
  446. * back to the original stack.
  447. */
  448. movq %rsp,%rbp
  449. CFI_DEF_CFA_REGISTER rbp
  450. #else
  451. SAVE_ARGS
  452. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  453. #endif
  454. testl $3,CS(%rdi)
  455. je 1f
  456. swapgs
  457. 1: incl %gs:pda_irqcount # RED-PEN should check preempt count
  458. movq %gs:pda_irqstackptr,%rax
  459. cmoveq %rax,%rsp /*todo This needs CFI annotation! */
  460. pushq %rdi # save old stack
  461. #ifndef CONFIG_DEBUG_INFO
  462. CFI_ADJUST_CFA_OFFSET 8
  463. #endif
  464. call \func
  465. .endm
  466. ENTRY(common_interrupt)
  467. XCPT_FRAME
  468. interrupt do_IRQ
  469. /* 0(%rsp): oldrsp-ARGOFFSET */
  470. ret_from_intr:
  471. popq %rdi
  472. #ifndef CONFIG_DEBUG_INFO
  473. CFI_ADJUST_CFA_OFFSET -8
  474. #endif
  475. cli
  476. decl %gs:pda_irqcount
  477. #ifdef CONFIG_DEBUG_INFO
  478. movq RBP(%rdi),%rbp
  479. CFI_DEF_CFA_REGISTER rsp
  480. #endif
  481. leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */
  482. exit_intr:
  483. GET_THREAD_INFO(%rcx)
  484. testl $3,CS-ARGOFFSET(%rsp)
  485. je retint_kernel
  486. /* Interrupt came from user space */
  487. /*
  488. * Has a correct top of stack, but a partial stack frame
  489. * %rcx: thread info. Interrupts off.
  490. */
  491. retint_with_reschedule:
  492. movl $_TIF_WORK_MASK,%edi
  493. retint_check:
  494. movl threadinfo_flags(%rcx),%edx
  495. andl %edi,%edx
  496. CFI_REMEMBER_STATE
  497. jnz retint_careful
  498. retint_swapgs:
  499. swapgs
  500. retint_restore_args:
  501. cli
  502. RESTORE_ARGS 0,8,0
  503. iret_label:
  504. iretq
  505. .section __ex_table,"a"
  506. .quad iret_label,bad_iret
  507. .previous
  508. .section .fixup,"ax"
  509. /* force a signal here? this matches i386 behaviour */
  510. /* running with kernel gs */
  511. bad_iret:
  512. movq $11,%rdi /* SIGSEGV */
  513. sti
  514. jmp do_exit
  515. .previous
  516. /* edi: workmask, edx: work */
  517. retint_careful:
  518. CFI_RESTORE_STATE
  519. bt $TIF_NEED_RESCHED,%edx
  520. jnc retint_signal
  521. sti
  522. pushq %rdi
  523. CFI_ADJUST_CFA_OFFSET 8
  524. call schedule
  525. popq %rdi
  526. CFI_ADJUST_CFA_OFFSET -8
  527. GET_THREAD_INFO(%rcx)
  528. cli
  529. jmp retint_check
  530. retint_signal:
  531. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  532. jz retint_swapgs
  533. sti
  534. SAVE_REST
  535. movq $-1,ORIG_RAX(%rsp)
  536. xorl %esi,%esi # oldset
  537. movq %rsp,%rdi # &pt_regs
  538. call do_notify_resume
  539. RESTORE_REST
  540. cli
  541. movl $_TIF_NEED_RESCHED,%edi
  542. GET_THREAD_INFO(%rcx)
  543. jmp retint_check
  544. #ifdef CONFIG_PREEMPT
  545. /* Returning to kernel space. Check if we need preemption */
  546. /* rcx: threadinfo. interrupts off. */
  547. .p2align
  548. retint_kernel:
  549. cmpl $0,threadinfo_preempt_count(%rcx)
  550. jnz retint_restore_args
  551. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  552. jnc retint_restore_args
  553. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  554. jnc retint_restore_args
  555. call preempt_schedule_irq
  556. jmp exit_intr
  557. #endif
  558. CFI_ENDPROC
  559. /*
  560. * APIC interrupts.
  561. */
  562. .macro apicinterrupt num,func
  563. INTR_FRAME
  564. pushq $\num-256
  565. CFI_ADJUST_CFA_OFFSET 8
  566. interrupt \func
  567. jmp ret_from_intr
  568. CFI_ENDPROC
  569. .endm
  570. ENTRY(thermal_interrupt)
  571. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  572. ENTRY(threshold_interrupt)
  573. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  574. #ifdef CONFIG_SMP
  575. ENTRY(reschedule_interrupt)
  576. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  577. .macro INVALIDATE_ENTRY num
  578. ENTRY(invalidate_interrupt\num)
  579. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  580. .endm
  581. INVALIDATE_ENTRY 0
  582. INVALIDATE_ENTRY 1
  583. INVALIDATE_ENTRY 2
  584. INVALIDATE_ENTRY 3
  585. INVALIDATE_ENTRY 4
  586. INVALIDATE_ENTRY 5
  587. INVALIDATE_ENTRY 6
  588. INVALIDATE_ENTRY 7
  589. ENTRY(call_function_interrupt)
  590. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  591. #endif
  592. #ifdef CONFIG_X86_LOCAL_APIC
  593. ENTRY(apic_timer_interrupt)
  594. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  595. ENTRY(error_interrupt)
  596. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  597. ENTRY(spurious_interrupt)
  598. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  599. #endif
  600. /*
  601. * Exception entry points.
  602. */
  603. .macro zeroentry sym
  604. INTR_FRAME
  605. pushq $0 /* push error code/oldrax */
  606. CFI_ADJUST_CFA_OFFSET 8
  607. pushq %rax /* push real oldrax to the rdi slot */
  608. CFI_ADJUST_CFA_OFFSET 8
  609. leaq \sym(%rip),%rax
  610. jmp error_entry
  611. CFI_ENDPROC
  612. .endm
  613. .macro errorentry sym
  614. XCPT_FRAME
  615. pushq %rax
  616. CFI_ADJUST_CFA_OFFSET 8
  617. leaq \sym(%rip),%rax
  618. jmp error_entry
  619. CFI_ENDPROC
  620. .endm
  621. /* error code is on the stack already */
  622. /* handle NMI like exceptions that can happen everywhere */
  623. .macro paranoidentry sym, ist=0
  624. SAVE_ALL
  625. cld
  626. movl $1,%ebx
  627. movl $MSR_GS_BASE,%ecx
  628. rdmsr
  629. testl %edx,%edx
  630. js 1f
  631. swapgs
  632. xorl %ebx,%ebx
  633. 1:
  634. .if \ist
  635. movq %gs:pda_data_offset, %rbp
  636. .endif
  637. movq %rsp,%rdi
  638. movq ORIG_RAX(%rsp),%rsi
  639. movq $-1,ORIG_RAX(%rsp)
  640. .if \ist
  641. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  642. .endif
  643. call \sym
  644. .if \ist
  645. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  646. .endif
  647. cli
  648. .endm
  649. /*
  650. * Exception entry point. This expects an error code/orig_rax on the stack
  651. * and the exception handler in %rax.
  652. */
  653. ENTRY(error_entry)
  654. _frame RDI
  655. /* rdi slot contains rax, oldrax contains error code */
  656. cld
  657. subq $14*8,%rsp
  658. CFI_ADJUST_CFA_OFFSET (14*8)
  659. movq %rsi,13*8(%rsp)
  660. CFI_REL_OFFSET rsi,RSI
  661. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  662. movq %rdx,12*8(%rsp)
  663. CFI_REL_OFFSET rdx,RDX
  664. movq %rcx,11*8(%rsp)
  665. CFI_REL_OFFSET rcx,RCX
  666. movq %rsi,10*8(%rsp) /* store rax */
  667. CFI_REL_OFFSET rax,RAX
  668. movq %r8, 9*8(%rsp)
  669. CFI_REL_OFFSET r8,R8
  670. movq %r9, 8*8(%rsp)
  671. CFI_REL_OFFSET r9,R9
  672. movq %r10,7*8(%rsp)
  673. CFI_REL_OFFSET r10,R10
  674. movq %r11,6*8(%rsp)
  675. CFI_REL_OFFSET r11,R11
  676. movq %rbx,5*8(%rsp)
  677. CFI_REL_OFFSET rbx,RBX
  678. movq %rbp,4*8(%rsp)
  679. CFI_REL_OFFSET rbp,RBP
  680. movq %r12,3*8(%rsp)
  681. CFI_REL_OFFSET r12,R12
  682. movq %r13,2*8(%rsp)
  683. CFI_REL_OFFSET r13,R13
  684. movq %r14,1*8(%rsp)
  685. CFI_REL_OFFSET r14,R14
  686. movq %r15,(%rsp)
  687. CFI_REL_OFFSET r15,R15
  688. xorl %ebx,%ebx
  689. testl $3,CS(%rsp)
  690. je error_kernelspace
  691. error_swapgs:
  692. swapgs
  693. error_sti:
  694. movq %rdi,RDI(%rsp)
  695. movq %rsp,%rdi
  696. movq ORIG_RAX(%rsp),%rsi /* get error code */
  697. movq $-1,ORIG_RAX(%rsp)
  698. call *%rax
  699. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  700. error_exit:
  701. movl %ebx,%eax
  702. RESTORE_REST
  703. cli
  704. GET_THREAD_INFO(%rcx)
  705. testl %eax,%eax
  706. jne retint_kernel
  707. movl threadinfo_flags(%rcx),%edx
  708. movl $_TIF_WORK_MASK,%edi
  709. andl %edi,%edx
  710. jnz retint_careful
  711. swapgs
  712. RESTORE_ARGS 0,8,0
  713. jmp iret_label
  714. CFI_ENDPROC
  715. error_kernelspace:
  716. incl %ebx
  717. /* There are two places in the kernel that can potentially fault with
  718. usergs. Handle them here. The exception handlers after
  719. iret run with kernel gs again, so don't set the user space flag.
  720. B stepping K8s sometimes report an truncated RIP for IRET
  721. exceptions returning to compat mode. Check for these here too. */
  722. leaq iret_label(%rip),%rbp
  723. cmpq %rbp,RIP(%rsp)
  724. je error_swapgs
  725. movl %ebp,%ebp /* zero extend */
  726. cmpq %rbp,RIP(%rsp)
  727. je error_swapgs
  728. cmpq $gs_change,RIP(%rsp)
  729. je error_swapgs
  730. jmp error_sti
  731. /* Reload gs selector with exception handling */
  732. /* edi: new selector */
  733. ENTRY(load_gs_index)
  734. CFI_STARTPROC
  735. pushf
  736. CFI_ADJUST_CFA_OFFSET 8
  737. cli
  738. swapgs
  739. gs_change:
  740. movl %edi,%gs
  741. 2: mfence /* workaround */
  742. swapgs
  743. popf
  744. CFI_ADJUST_CFA_OFFSET -8
  745. ret
  746. CFI_ENDPROC
  747. .section __ex_table,"a"
  748. .align 8
  749. .quad gs_change,bad_gs
  750. .previous
  751. .section .fixup,"ax"
  752. /* running with kernelgs */
  753. bad_gs:
  754. swapgs /* switch back to user gs */
  755. xorl %eax,%eax
  756. movl %eax,%gs
  757. jmp 2b
  758. .previous
  759. /*
  760. * Create a kernel thread.
  761. *
  762. * C extern interface:
  763. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  764. *
  765. * asm input arguments:
  766. * rdi: fn, rsi: arg, rdx: flags
  767. */
  768. ENTRY(kernel_thread)
  769. CFI_STARTPROC
  770. FAKE_STACK_FRAME $child_rip
  771. SAVE_ALL
  772. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  773. movq %rdx,%rdi
  774. orq kernel_thread_flags(%rip),%rdi
  775. movq $-1, %rsi
  776. movq %rsp, %rdx
  777. xorl %r8d,%r8d
  778. xorl %r9d,%r9d
  779. # clone now
  780. call do_fork
  781. movq %rax,RAX(%rsp)
  782. xorl %edi,%edi
  783. /*
  784. * It isn't worth to check for reschedule here,
  785. * so internally to the x86_64 port you can rely on kernel_thread()
  786. * not to reschedule the child before returning, this avoids the need
  787. * of hacks for example to fork off the per-CPU idle tasks.
  788. * [Hopefully no generic code relies on the reschedule -AK]
  789. */
  790. RESTORE_ALL
  791. UNFAKE_STACK_FRAME
  792. ret
  793. CFI_ENDPROC
  794. child_rip:
  795. /*
  796. * Here we are in the child and the registers are set as they were
  797. * at kernel_thread() invocation in the parent.
  798. */
  799. movq %rdi, %rax
  800. movq %rsi, %rdi
  801. call *%rax
  802. # exit
  803. xorl %edi, %edi
  804. call do_exit
  805. /*
  806. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  807. *
  808. * C extern interface:
  809. * extern long execve(char *name, char **argv, char **envp)
  810. *
  811. * asm input arguments:
  812. * rdi: name, rsi: argv, rdx: envp
  813. *
  814. * We want to fallback into:
  815. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  816. *
  817. * do_sys_execve asm fallback arguments:
  818. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  819. */
  820. ENTRY(execve)
  821. CFI_STARTPROC
  822. FAKE_STACK_FRAME $0
  823. SAVE_ALL
  824. call sys_execve
  825. movq %rax, RAX(%rsp)
  826. RESTORE_REST
  827. testq %rax,%rax
  828. je int_ret_from_sys_call
  829. RESTORE_ARGS
  830. UNFAKE_STACK_FRAME
  831. ret
  832. CFI_ENDPROC
  833. KPROBE_ENTRY(page_fault)
  834. errorentry do_page_fault
  835. .previous .text
  836. ENTRY(coprocessor_error)
  837. zeroentry do_coprocessor_error
  838. ENTRY(simd_coprocessor_error)
  839. zeroentry do_simd_coprocessor_error
  840. ENTRY(device_not_available)
  841. zeroentry math_state_restore
  842. /* runs on exception stack */
  843. KPROBE_ENTRY(debug)
  844. INTR_FRAME
  845. pushq $0
  846. CFI_ADJUST_CFA_OFFSET 8
  847. paranoidentry do_debug, DEBUG_STACK
  848. jmp paranoid_exit
  849. CFI_ENDPROC
  850. .previous .text
  851. /* runs on exception stack */
  852. KPROBE_ENTRY(nmi)
  853. INTR_FRAME
  854. pushq $-1
  855. CFI_ADJUST_CFA_OFFSET 8
  856. paranoidentry do_nmi
  857. /*
  858. * "Paranoid" exit path from exception stack.
  859. * Paranoid because this is used by NMIs and cannot take
  860. * any kernel state for granted.
  861. * We don't do kernel preemption checks here, because only
  862. * NMI should be common and it does not enable IRQs and
  863. * cannot get reschedule ticks.
  864. */
  865. /* ebx: no swapgs flag */
  866. paranoid_exit:
  867. testl %ebx,%ebx /* swapgs needed? */
  868. jnz paranoid_restore
  869. testl $3,CS(%rsp)
  870. jnz paranoid_userspace
  871. paranoid_swapgs:
  872. swapgs
  873. paranoid_restore:
  874. RESTORE_ALL 8
  875. iretq
  876. paranoid_userspace:
  877. GET_THREAD_INFO(%rcx)
  878. movl threadinfo_flags(%rcx),%ebx
  879. andl $_TIF_WORK_MASK,%ebx
  880. jz paranoid_swapgs
  881. movq %rsp,%rdi /* &pt_regs */
  882. call sync_regs
  883. movq %rax,%rsp /* switch stack for scheduling */
  884. testl $_TIF_NEED_RESCHED,%ebx
  885. jnz paranoid_schedule
  886. movl %ebx,%edx /* arg3: thread flags */
  887. sti
  888. xorl %esi,%esi /* arg2: oldset */
  889. movq %rsp,%rdi /* arg1: &pt_regs */
  890. call do_notify_resume
  891. cli
  892. jmp paranoid_userspace
  893. paranoid_schedule:
  894. sti
  895. call schedule
  896. cli
  897. jmp paranoid_userspace
  898. CFI_ENDPROC
  899. .previous .text
  900. KPROBE_ENTRY(int3)
  901. INTR_FRAME
  902. pushq $0
  903. CFI_ADJUST_CFA_OFFSET 8
  904. paranoidentry do_int3, DEBUG_STACK
  905. jmp paranoid_exit
  906. CFI_ENDPROC
  907. .previous .text
  908. ENTRY(overflow)
  909. zeroentry do_overflow
  910. ENTRY(bounds)
  911. zeroentry do_bounds
  912. ENTRY(invalid_op)
  913. zeroentry do_invalid_op
  914. ENTRY(coprocessor_segment_overrun)
  915. zeroentry do_coprocessor_segment_overrun
  916. ENTRY(reserved)
  917. zeroentry do_reserved
  918. /* runs on exception stack */
  919. ENTRY(double_fault)
  920. XCPT_FRAME
  921. paranoidentry do_double_fault
  922. jmp paranoid_exit
  923. CFI_ENDPROC
  924. ENTRY(invalid_TSS)
  925. errorentry do_invalid_TSS
  926. ENTRY(segment_not_present)
  927. errorentry do_segment_not_present
  928. /* runs on exception stack */
  929. ENTRY(stack_segment)
  930. XCPT_FRAME
  931. paranoidentry do_stack_segment
  932. jmp paranoid_exit
  933. CFI_ENDPROC
  934. KPROBE_ENTRY(general_protection)
  935. errorentry do_general_protection
  936. .previous .text
  937. ENTRY(alignment_check)
  938. errorentry do_alignment_check
  939. ENTRY(divide_error)
  940. zeroentry do_divide_error
  941. ENTRY(spurious_interrupt_bug)
  942. zeroentry do_spurious_interrupt_bug
  943. #ifdef CONFIG_X86_MCE
  944. /* runs on exception stack */
  945. ENTRY(machine_check)
  946. INTR_FRAME
  947. pushq $0
  948. CFI_ADJUST_CFA_OFFSET 8
  949. paranoidentry do_machine_check
  950. jmp paranoid_exit
  951. CFI_ENDPROC
  952. #endif
  953. ENTRY(call_softirq)
  954. CFI_STARTPROC
  955. movq %gs:pda_irqstackptr,%rax
  956. movq %rsp,%rdx
  957. CFI_DEF_CFA_REGISTER rdx
  958. incl %gs:pda_irqcount
  959. cmove %rax,%rsp
  960. pushq %rdx
  961. /*todo CFI_DEF_CFA_EXPRESSION ...*/
  962. call __do_softirq
  963. popq %rsp
  964. CFI_DEF_CFA_REGISTER rsp
  965. decl %gs:pda_irqcount
  966. ret
  967. CFI_ENDPROC