entry.S 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #define ASSEMBLY 1
  29. #include <linux/config.h>
  30. #include <linux/linkage.h>
  31. #include <asm/segment.h>
  32. #include <asm/smp.h>
  33. #include <asm/cache.h>
  34. #include <asm/errno.h>
  35. #include <asm/dwarf2.h>
  36. #include <asm/calling.h>
  37. #include <asm/asm-offsets.h>
  38. #include <asm/msr.h>
  39. #include <asm/unistd.h>
  40. #include <asm/thread_info.h>
  41. #include <asm/hw_irq.h>
  42. .code64
  43. #ifndef CONFIG_PREEMPT
  44. #define retint_kernel retint_restore_args
  45. #endif
  46. /*
  47. * C code is not supposed to know about undefined top of stack. Every time
  48. * a C function with an pt_regs argument is called from the SYSCALL based
  49. * fast path FIXUP_TOP_OF_STACK is needed.
  50. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  51. * manipulation.
  52. */
  53. /* %rsp:at FRAMEEND */
  54. .macro FIXUP_TOP_OF_STACK tmp
  55. movq %gs:pda_oldrsp,\tmp
  56. movq \tmp,RSP(%rsp)
  57. movq $__USER_DS,SS(%rsp)
  58. movq $__USER_CS,CS(%rsp)
  59. movq $-1,RCX(%rsp)
  60. movq R11(%rsp),\tmp /* get eflags */
  61. movq \tmp,EFLAGS(%rsp)
  62. .endm
  63. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  64. movq RSP-\offset(%rsp),\tmp
  65. movq \tmp,%gs:pda_oldrsp
  66. movq EFLAGS-\offset(%rsp),\tmp
  67. movq \tmp,R11-\offset(%rsp)
  68. .endm
  69. .macro FAKE_STACK_FRAME child_rip
  70. /* push in order ss, rsp, eflags, cs, rip */
  71. xorl %eax, %eax
  72. pushq %rax /* ss */
  73. CFI_ADJUST_CFA_OFFSET 8
  74. /*CFI_REL_OFFSET ss,0*/
  75. pushq %rax /* rsp */
  76. CFI_ADJUST_CFA_OFFSET 8
  77. CFI_REL_OFFSET rsp,0
  78. pushq $(1<<9) /* eflags - interrupts on */
  79. CFI_ADJUST_CFA_OFFSET 8
  80. /*CFI_REL_OFFSET rflags,0*/
  81. pushq $__KERNEL_CS /* cs */
  82. CFI_ADJUST_CFA_OFFSET 8
  83. /*CFI_REL_OFFSET cs,0*/
  84. pushq \child_rip /* rip */
  85. CFI_ADJUST_CFA_OFFSET 8
  86. CFI_REL_OFFSET rip,0
  87. pushq %rax /* orig rax */
  88. CFI_ADJUST_CFA_OFFSET 8
  89. .endm
  90. .macro UNFAKE_STACK_FRAME
  91. addq $8*6, %rsp
  92. CFI_ADJUST_CFA_OFFSET -(6*8)
  93. .endm
  94. .macro CFI_DEFAULT_STACK start=1
  95. .if \start
  96. CFI_STARTPROC simple
  97. CFI_DEF_CFA rsp,SS+8
  98. .else
  99. CFI_DEF_CFA_OFFSET SS+8
  100. .endif
  101. CFI_REL_OFFSET r15,R15
  102. CFI_REL_OFFSET r14,R14
  103. CFI_REL_OFFSET r13,R13
  104. CFI_REL_OFFSET r12,R12
  105. CFI_REL_OFFSET rbp,RBP
  106. CFI_REL_OFFSET rbx,RBX
  107. CFI_REL_OFFSET r11,R11
  108. CFI_REL_OFFSET r10,R10
  109. CFI_REL_OFFSET r9,R9
  110. CFI_REL_OFFSET r8,R8
  111. CFI_REL_OFFSET rax,RAX
  112. CFI_REL_OFFSET rcx,RCX
  113. CFI_REL_OFFSET rdx,RDX
  114. CFI_REL_OFFSET rsi,RSI
  115. CFI_REL_OFFSET rdi,RDI
  116. CFI_REL_OFFSET rip,RIP
  117. /*CFI_REL_OFFSET cs,CS*/
  118. /*CFI_REL_OFFSET rflags,EFLAGS*/
  119. CFI_REL_OFFSET rsp,RSP
  120. /*CFI_REL_OFFSET ss,SS*/
  121. .endm
  122. /*
  123. * A newly forked process directly context switches into this.
  124. */
  125. /* rdi: prev */
  126. ENTRY(ret_from_fork)
  127. CFI_DEFAULT_STACK
  128. call schedule_tail
  129. GET_THREAD_INFO(%rcx)
  130. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  131. jnz rff_trace
  132. rff_action:
  133. RESTORE_REST
  134. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  135. je int_ret_from_sys_call
  136. testl $_TIF_IA32,threadinfo_flags(%rcx)
  137. jnz int_ret_from_sys_call
  138. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  139. jmp ret_from_sys_call
  140. rff_trace:
  141. movq %rsp,%rdi
  142. call syscall_trace_leave
  143. GET_THREAD_INFO(%rcx)
  144. jmp rff_action
  145. CFI_ENDPROC
  146. /*
  147. * System call entry. Upto 6 arguments in registers are supported.
  148. *
  149. * SYSCALL does not save anything on the stack and does not change the
  150. * stack pointer.
  151. */
  152. /*
  153. * Register setup:
  154. * rax system call number
  155. * rdi arg0
  156. * rcx return address for syscall/sysret, C arg3
  157. * rsi arg1
  158. * rdx arg2
  159. * r10 arg3 (--> moved to rcx for C)
  160. * r8 arg4
  161. * r9 arg5
  162. * r11 eflags for syscall/sysret, temporary for C
  163. * r12-r15,rbp,rbx saved by C code, not touched.
  164. *
  165. * Interrupts are off on entry.
  166. * Only called from user space.
  167. *
  168. * XXX if we had a free scratch register we could save the RSP into the stack frame
  169. * and report it properly in ps. Unfortunately we haven't.
  170. */
  171. ENTRY(system_call)
  172. CFI_STARTPROC simple
  173. CFI_DEF_CFA rsp,0
  174. CFI_REGISTER rip,rcx
  175. /*CFI_REGISTER rflags,r11*/
  176. swapgs
  177. movq %rsp,%gs:pda_oldrsp
  178. movq %gs:pda_kernelstack,%rsp
  179. sti
  180. SAVE_ARGS 8,1
  181. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  182. movq %rcx,RIP-ARGOFFSET(%rsp)
  183. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  184. GET_THREAD_INFO(%rcx)
  185. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  186. CFI_REMEMBER_STATE
  187. jnz tracesys
  188. cmpq $__NR_syscall_max,%rax
  189. ja badsys
  190. movq %r10,%rcx
  191. call *sys_call_table(,%rax,8) # XXX: rip relative
  192. movq %rax,RAX-ARGOFFSET(%rsp)
  193. /*
  194. * Syscall return path ending with SYSRET (fast path)
  195. * Has incomplete stack frame and undefined top of stack.
  196. */
  197. .globl ret_from_sys_call
  198. ret_from_sys_call:
  199. movl $_TIF_ALLWORK_MASK,%edi
  200. /* edi: flagmask */
  201. sysret_check:
  202. GET_THREAD_INFO(%rcx)
  203. cli
  204. movl threadinfo_flags(%rcx),%edx
  205. andl %edi,%edx
  206. CFI_REMEMBER_STATE
  207. jnz sysret_careful
  208. movq RIP-ARGOFFSET(%rsp),%rcx
  209. CFI_REGISTER rip,rcx
  210. RESTORE_ARGS 0,-ARG_SKIP,1
  211. /*CFI_REGISTER rflags,r11*/
  212. movq %gs:pda_oldrsp,%rsp
  213. swapgs
  214. sysretq
  215. /* Handle reschedules */
  216. /* edx: work, edi: workmask */
  217. sysret_careful:
  218. CFI_RESTORE_STATE
  219. bt $TIF_NEED_RESCHED,%edx
  220. jnc sysret_signal
  221. sti
  222. pushq %rdi
  223. CFI_ADJUST_CFA_OFFSET 8
  224. call schedule
  225. popq %rdi
  226. CFI_ADJUST_CFA_OFFSET -8
  227. jmp sysret_check
  228. /* Handle a signal */
  229. sysret_signal:
  230. sti
  231. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  232. jz 1f
  233. /* Really a signal */
  234. /* edx: work flags (arg3) */
  235. leaq do_notify_resume(%rip),%rax
  236. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  237. xorl %esi,%esi # oldset -> arg2
  238. call ptregscall_common
  239. 1: movl $_TIF_NEED_RESCHED,%edi
  240. jmp sysret_check
  241. badsys:
  242. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  243. jmp ret_from_sys_call
  244. /* Do syscall tracing */
  245. tracesys:
  246. CFI_RESTORE_STATE
  247. SAVE_REST
  248. movq $-ENOSYS,RAX(%rsp)
  249. FIXUP_TOP_OF_STACK %rdi
  250. movq %rsp,%rdi
  251. call syscall_trace_enter
  252. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  253. RESTORE_REST
  254. cmpq $__NR_syscall_max,%rax
  255. ja 1f
  256. movq %r10,%rcx /* fixup for C */
  257. call *sys_call_table(,%rax,8)
  258. movq %rax,RAX-ARGOFFSET(%rsp)
  259. 1: SAVE_REST
  260. movq %rsp,%rdi
  261. call syscall_trace_leave
  262. RESTORE_TOP_OF_STACK %rbx
  263. RESTORE_REST
  264. jmp ret_from_sys_call
  265. CFI_ENDPROC
  266. /*
  267. * Syscall return path ending with IRET.
  268. * Has correct top of stack, but partial stack frame.
  269. */
  270. ENTRY(int_ret_from_sys_call)
  271. CFI_STARTPROC simple
  272. CFI_DEF_CFA rsp,SS+8-ARGOFFSET
  273. /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
  274. CFI_REL_OFFSET rsp,RSP-ARGOFFSET
  275. /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
  276. /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
  277. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  278. CFI_REL_OFFSET rdx,RDX-ARGOFFSET
  279. CFI_REL_OFFSET rcx,RCX-ARGOFFSET
  280. CFI_REL_OFFSET rax,RAX-ARGOFFSET
  281. CFI_REL_OFFSET rdi,RDI-ARGOFFSET
  282. CFI_REL_OFFSET rsi,RSI-ARGOFFSET
  283. CFI_REL_OFFSET r8,R8-ARGOFFSET
  284. CFI_REL_OFFSET r9,R9-ARGOFFSET
  285. CFI_REL_OFFSET r10,R10-ARGOFFSET
  286. CFI_REL_OFFSET r11,R11-ARGOFFSET
  287. cli
  288. testl $3,CS-ARGOFFSET(%rsp)
  289. je retint_restore_args
  290. movl $_TIF_ALLWORK_MASK,%edi
  291. /* edi: mask to check */
  292. int_with_check:
  293. GET_THREAD_INFO(%rcx)
  294. movl threadinfo_flags(%rcx),%edx
  295. andl %edi,%edx
  296. jnz int_careful
  297. jmp retint_swapgs
  298. /* Either reschedule or signal or syscall exit tracking needed. */
  299. /* First do a reschedule test. */
  300. /* edx: work, edi: workmask */
  301. int_careful:
  302. bt $TIF_NEED_RESCHED,%edx
  303. jnc int_very_careful
  304. sti
  305. pushq %rdi
  306. CFI_ADJUST_CFA_OFFSET 8
  307. call schedule
  308. popq %rdi
  309. CFI_ADJUST_CFA_OFFSET -8
  310. cli
  311. jmp int_with_check
  312. /* handle signals and tracing -- both require a full stack frame */
  313. int_very_careful:
  314. sti
  315. SAVE_REST
  316. /* Check for syscall exit trace */
  317. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  318. jz int_signal
  319. pushq %rdi
  320. CFI_ADJUST_CFA_OFFSET 8
  321. leaq 8(%rsp),%rdi # &ptregs -> arg1
  322. call syscall_trace_leave
  323. popq %rdi
  324. CFI_ADJUST_CFA_OFFSET -8
  325. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  326. cli
  327. jmp int_restore_rest
  328. int_signal:
  329. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  330. jz 1f
  331. movq %rsp,%rdi # &ptregs -> arg1
  332. xorl %esi,%esi # oldset -> arg2
  333. call do_notify_resume
  334. 1: movl $_TIF_NEED_RESCHED,%edi
  335. int_restore_rest:
  336. RESTORE_REST
  337. cli
  338. jmp int_with_check
  339. CFI_ENDPROC
  340. /*
  341. * Certain special system calls that need to save a complete full stack frame.
  342. */
  343. .macro PTREGSCALL label,func,arg
  344. .globl \label
  345. \label:
  346. leaq \func(%rip),%rax
  347. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  348. jmp ptregscall_common
  349. .endm
  350. CFI_STARTPROC
  351. PTREGSCALL stub_clone, sys_clone, %r8
  352. PTREGSCALL stub_fork, sys_fork, %rdi
  353. PTREGSCALL stub_vfork, sys_vfork, %rdi
  354. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  355. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  356. PTREGSCALL stub_iopl, sys_iopl, %rsi
  357. ENTRY(ptregscall_common)
  358. popq %r11
  359. CFI_ADJUST_CFA_OFFSET -8
  360. CFI_REGISTER rip, r11
  361. SAVE_REST
  362. movq %r11, %r15
  363. CFI_REGISTER rip, r15
  364. FIXUP_TOP_OF_STACK %r11
  365. call *%rax
  366. RESTORE_TOP_OF_STACK %r11
  367. movq %r15, %r11
  368. CFI_REGISTER rip, r11
  369. RESTORE_REST
  370. pushq %r11
  371. CFI_ADJUST_CFA_OFFSET 8
  372. CFI_REL_OFFSET rip, 0
  373. ret
  374. CFI_ENDPROC
  375. ENTRY(stub_execve)
  376. CFI_STARTPROC
  377. popq %r11
  378. CFI_ADJUST_CFA_OFFSET -8
  379. CFI_REGISTER rip, r11
  380. SAVE_REST
  381. movq %r11, %r15
  382. CFI_REGISTER rip, r15
  383. FIXUP_TOP_OF_STACK %r11
  384. call sys_execve
  385. GET_THREAD_INFO(%rcx)
  386. bt $TIF_IA32,threadinfo_flags(%rcx)
  387. CFI_REMEMBER_STATE
  388. jc exec_32bit
  389. RESTORE_TOP_OF_STACK %r11
  390. movq %r15, %r11
  391. CFI_REGISTER rip, r11
  392. RESTORE_REST
  393. pushq %r11
  394. CFI_ADJUST_CFA_OFFSET 8
  395. CFI_REL_OFFSET rip, 0
  396. ret
  397. exec_32bit:
  398. CFI_RESTORE_STATE
  399. movq %rax,RAX(%rsp)
  400. RESTORE_REST
  401. jmp int_ret_from_sys_call
  402. CFI_ENDPROC
  403. /*
  404. * sigreturn is special because it needs to restore all registers on return.
  405. * This cannot be done with SYSRET, so use the IRET return path instead.
  406. */
  407. ENTRY(stub_rt_sigreturn)
  408. CFI_STARTPROC
  409. addq $8, %rsp
  410. CFI_ADJUST_CFA_OFFSET -8
  411. SAVE_REST
  412. movq %rsp,%rdi
  413. FIXUP_TOP_OF_STACK %r11
  414. call sys_rt_sigreturn
  415. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  416. RESTORE_REST
  417. jmp int_ret_from_sys_call
  418. CFI_ENDPROC
  419. /*
  420. * initial frame state for interrupts and exceptions
  421. */
  422. .macro _frame ref
  423. CFI_STARTPROC simple
  424. CFI_DEF_CFA rsp,SS+8-\ref
  425. /*CFI_REL_OFFSET ss,SS-\ref*/
  426. CFI_REL_OFFSET rsp,RSP-\ref
  427. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  428. /*CFI_REL_OFFSET cs,CS-\ref*/
  429. CFI_REL_OFFSET rip,RIP-\ref
  430. .endm
  431. /* initial frame state for interrupts (and exceptions without error code) */
  432. #define INTR_FRAME _frame RIP
  433. /* initial frame state for exceptions with error code (and interrupts with
  434. vector already pushed) */
  435. #define XCPT_FRAME _frame ORIG_RAX
  436. /*
  437. * Interrupt entry/exit.
  438. *
  439. * Interrupt entry points save only callee clobbered registers in fast path.
  440. *
  441. * Entry runs with interrupts off.
  442. */
  443. /* 0(%rsp): interrupt number */
  444. .macro interrupt func
  445. cld
  446. #ifdef CONFIG_DEBUG_INFO
  447. SAVE_ALL
  448. movq %rsp,%rdi
  449. /*
  450. * Setup a stack frame pointer. This allows gdb to trace
  451. * back to the original stack.
  452. */
  453. movq %rsp,%rbp
  454. CFI_DEF_CFA_REGISTER rbp
  455. #else
  456. SAVE_ARGS
  457. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  458. #endif
  459. testl $3,CS(%rdi)
  460. je 1f
  461. swapgs
  462. 1: incl %gs:pda_irqcount # RED-PEN should check preempt count
  463. movq %gs:pda_irqstackptr,%rax
  464. cmoveq %rax,%rsp /*todo This needs CFI annotation! */
  465. pushq %rdi # save old stack
  466. CFI_ADJUST_CFA_OFFSET 8
  467. call \func
  468. .endm
  469. ENTRY(common_interrupt)
  470. XCPT_FRAME
  471. interrupt do_IRQ
  472. /* 0(%rsp): oldrsp-ARGOFFSET */
  473. ret_from_intr:
  474. popq %rdi
  475. CFI_ADJUST_CFA_OFFSET -8
  476. cli
  477. decl %gs:pda_irqcount
  478. #ifdef CONFIG_DEBUG_INFO
  479. movq RBP(%rdi),%rbp
  480. CFI_DEF_CFA_REGISTER rsp
  481. #endif
  482. leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */
  483. exit_intr:
  484. GET_THREAD_INFO(%rcx)
  485. testl $3,CS-ARGOFFSET(%rsp)
  486. je retint_kernel
  487. /* Interrupt came from user space */
  488. /*
  489. * Has a correct top of stack, but a partial stack frame
  490. * %rcx: thread info. Interrupts off.
  491. */
  492. retint_with_reschedule:
  493. movl $_TIF_WORK_MASK,%edi
  494. retint_check:
  495. movl threadinfo_flags(%rcx),%edx
  496. andl %edi,%edx
  497. CFI_REMEMBER_STATE
  498. jnz retint_careful
  499. retint_swapgs:
  500. swapgs
  501. retint_restore_args:
  502. cli
  503. RESTORE_ARGS 0,8,0
  504. iret_label:
  505. iretq
  506. .section __ex_table,"a"
  507. .quad iret_label,bad_iret
  508. .previous
  509. .section .fixup,"ax"
  510. /* force a signal here? this matches i386 behaviour */
  511. /* running with kernel gs */
  512. bad_iret:
  513. movq $-9999,%rdi /* better code? */
  514. jmp do_exit
  515. .previous
  516. /* edi: workmask, edx: work */
  517. retint_careful:
  518. CFI_RESTORE_STATE
  519. bt $TIF_NEED_RESCHED,%edx
  520. jnc retint_signal
  521. sti
  522. pushq %rdi
  523. CFI_ADJUST_CFA_OFFSET 8
  524. call schedule
  525. popq %rdi
  526. CFI_ADJUST_CFA_OFFSET -8
  527. GET_THREAD_INFO(%rcx)
  528. cli
  529. jmp retint_check
  530. retint_signal:
  531. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  532. jz retint_swapgs
  533. sti
  534. SAVE_REST
  535. movq $-1,ORIG_RAX(%rsp)
  536. xorl %esi,%esi # oldset
  537. movq %rsp,%rdi # &pt_regs
  538. call do_notify_resume
  539. RESTORE_REST
  540. cli
  541. movl $_TIF_NEED_RESCHED,%edi
  542. GET_THREAD_INFO(%rcx)
  543. jmp retint_check
  544. #ifdef CONFIG_PREEMPT
  545. /* Returning to kernel space. Check if we need preemption */
  546. /* rcx: threadinfo. interrupts off. */
  547. .p2align
  548. retint_kernel:
  549. cmpl $0,threadinfo_preempt_count(%rcx)
  550. jnz retint_restore_args
  551. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  552. jnc retint_restore_args
  553. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  554. jnc retint_restore_args
  555. call preempt_schedule_irq
  556. jmp exit_intr
  557. #endif
  558. CFI_ENDPROC
  559. /*
  560. * APIC interrupts.
  561. */
  562. .macro apicinterrupt num,func
  563. INTR_FRAME
  564. pushq $\num-256
  565. CFI_ADJUST_CFA_OFFSET 8
  566. interrupt \func
  567. jmp ret_from_intr
  568. CFI_ENDPROC
  569. .endm
  570. ENTRY(thermal_interrupt)
  571. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  572. ENTRY(threshold_interrupt)
  573. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  574. #ifdef CONFIG_SMP
  575. ENTRY(reschedule_interrupt)
  576. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  577. .macro INVALIDATE_ENTRY num
  578. ENTRY(invalidate_interrupt\num)
  579. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  580. .endm
  581. INVALIDATE_ENTRY 0
  582. INVALIDATE_ENTRY 1
  583. INVALIDATE_ENTRY 2
  584. INVALIDATE_ENTRY 3
  585. INVALIDATE_ENTRY 4
  586. INVALIDATE_ENTRY 5
  587. INVALIDATE_ENTRY 6
  588. INVALIDATE_ENTRY 7
  589. ENTRY(call_function_interrupt)
  590. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  591. #endif
  592. #ifdef CONFIG_X86_LOCAL_APIC
  593. ENTRY(apic_timer_interrupt)
  594. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  595. ENTRY(error_interrupt)
  596. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  597. ENTRY(spurious_interrupt)
  598. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  599. #endif
  600. /*
  601. * Exception entry points.
  602. */
  603. .macro zeroentry sym
  604. INTR_FRAME
  605. pushq $0 /* push error code/oldrax */
  606. CFI_ADJUST_CFA_OFFSET 8
  607. pushq %rax /* push real oldrax to the rdi slot */
  608. CFI_ADJUST_CFA_OFFSET 8
  609. leaq \sym(%rip),%rax
  610. jmp error_entry
  611. CFI_ENDPROC
  612. .endm
  613. .macro errorentry sym
  614. XCPT_FRAME
  615. pushq %rax
  616. CFI_ADJUST_CFA_OFFSET 8
  617. leaq \sym(%rip),%rax
  618. jmp error_entry
  619. CFI_ENDPROC
  620. .endm
  621. /* error code is on the stack already */
  622. /* handle NMI like exceptions that can happen everywhere */
  623. #ifndef DEBUG_IST
  624. # define DEBUG_IST 0
  625. #endif
  626. .macro paranoidentry sym, ist=0
  627. SAVE_ALL
  628. cld
  629. movl $1,%ebx
  630. movl $MSR_GS_BASE,%ecx
  631. rdmsr
  632. testl %edx,%edx
  633. js 1f
  634. swapgs
  635. xorl %ebx,%ebx
  636. 1:
  637. .if \ist
  638. movq %gs:pda_data_offset, %rbp
  639. .endif
  640. movq %rsp,%rdi
  641. movq ORIG_RAX(%rsp),%rsi
  642. movq $-1,ORIG_RAX(%rsp)
  643. .if \ist
  644. subq $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  645. .endif
  646. call \sym
  647. .if \ist
  648. addq $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  649. .endif
  650. cli
  651. .endm
  652. /*
  653. * Exception entry point. This expects an error code/orig_rax on the stack
  654. * and the exception handler in %rax.
  655. */
  656. ENTRY(error_entry)
  657. _frame RDI
  658. /* rdi slot contains rax, oldrax contains error code */
  659. cld
  660. subq $14*8,%rsp
  661. CFI_ADJUST_CFA_OFFSET (14*8)
  662. movq %rsi,13*8(%rsp)
  663. CFI_REL_OFFSET rsi,RSI
  664. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  665. movq %rdx,12*8(%rsp)
  666. CFI_REL_OFFSET rdx,RDX
  667. movq %rcx,11*8(%rsp)
  668. CFI_REL_OFFSET rcx,RCX
  669. movq %rsi,10*8(%rsp) /* store rax */
  670. CFI_REL_OFFSET rax,RAX
  671. movq %r8, 9*8(%rsp)
  672. CFI_REL_OFFSET r8,R8
  673. movq %r9, 8*8(%rsp)
  674. CFI_REL_OFFSET r9,R9
  675. movq %r10,7*8(%rsp)
  676. CFI_REL_OFFSET r10,R10
  677. movq %r11,6*8(%rsp)
  678. CFI_REL_OFFSET r11,R11
  679. movq %rbx,5*8(%rsp)
  680. CFI_REL_OFFSET rbx,RBX
  681. movq %rbp,4*8(%rsp)
  682. CFI_REL_OFFSET rbp,RBP
  683. movq %r12,3*8(%rsp)
  684. CFI_REL_OFFSET r12,R12
  685. movq %r13,2*8(%rsp)
  686. CFI_REL_OFFSET r13,R13
  687. movq %r14,1*8(%rsp)
  688. CFI_REL_OFFSET r14,R14
  689. movq %r15,(%rsp)
  690. CFI_REL_OFFSET r15,R15
  691. xorl %ebx,%ebx
  692. testl $3,CS(%rsp)
  693. je error_kernelspace
  694. error_swapgs:
  695. swapgs
  696. error_sti:
  697. movq %rdi,RDI(%rsp)
  698. movq %rsp,%rdi
  699. movq ORIG_RAX(%rsp),%rsi /* get error code */
  700. movq $-1,ORIG_RAX(%rsp)
  701. call *%rax
  702. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  703. error_exit:
  704. movl %ebx,%eax
  705. RESTORE_REST
  706. cli
  707. GET_THREAD_INFO(%rcx)
  708. testl %eax,%eax
  709. jne retint_kernel
  710. movl threadinfo_flags(%rcx),%edx
  711. movl $_TIF_WORK_MASK,%edi
  712. andl %edi,%edx
  713. jnz retint_careful
  714. swapgs
  715. RESTORE_ARGS 0,8,0
  716. jmp iret_label
  717. CFI_ENDPROC
  718. error_kernelspace:
  719. incl %ebx
  720. /* There are two places in the kernel that can potentially fault with
  721. usergs. Handle them here. The exception handlers after
  722. iret run with kernel gs again, so don't set the user space flag.
  723. B stepping K8s sometimes report an truncated RIP for IRET
  724. exceptions returning to compat mode. Check for these here too. */
  725. leaq iret_label(%rip),%rbp
  726. cmpq %rbp,RIP(%rsp)
  727. je error_swapgs
  728. movl %ebp,%ebp /* zero extend */
  729. cmpq %rbp,RIP(%rsp)
  730. je error_swapgs
  731. cmpq $gs_change,RIP(%rsp)
  732. je error_swapgs
  733. jmp error_sti
  734. /* Reload gs selector with exception handling */
  735. /* edi: new selector */
  736. ENTRY(load_gs_index)
  737. CFI_STARTPROC
  738. pushf
  739. CFI_ADJUST_CFA_OFFSET 8
  740. cli
  741. swapgs
  742. gs_change:
  743. movl %edi,%gs
  744. 2: mfence /* workaround */
  745. swapgs
  746. popf
  747. CFI_ADJUST_CFA_OFFSET -8
  748. ret
  749. CFI_ENDPROC
  750. .section __ex_table,"a"
  751. .align 8
  752. .quad gs_change,bad_gs
  753. .previous
  754. .section .fixup,"ax"
  755. /* running with kernelgs */
  756. bad_gs:
  757. swapgs /* switch back to user gs */
  758. xorl %eax,%eax
  759. movl %eax,%gs
  760. jmp 2b
  761. .previous
  762. /*
  763. * Create a kernel thread.
  764. *
  765. * C extern interface:
  766. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  767. *
  768. * asm input arguments:
  769. * rdi: fn, rsi: arg, rdx: flags
  770. */
  771. ENTRY(kernel_thread)
  772. CFI_STARTPROC
  773. FAKE_STACK_FRAME $child_rip
  774. SAVE_ALL
  775. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  776. movq %rdx,%rdi
  777. orq kernel_thread_flags(%rip),%rdi
  778. movq $-1, %rsi
  779. movq %rsp, %rdx
  780. xorl %r8d,%r8d
  781. xorl %r9d,%r9d
  782. # clone now
  783. call do_fork
  784. movq %rax,RAX(%rsp)
  785. xorl %edi,%edi
  786. /*
  787. * It isn't worth to check for reschedule here,
  788. * so internally to the x86_64 port you can rely on kernel_thread()
  789. * not to reschedule the child before returning, this avoids the need
  790. * of hacks for example to fork off the per-CPU idle tasks.
  791. * [Hopefully no generic code relies on the reschedule -AK]
  792. */
  793. RESTORE_ALL
  794. UNFAKE_STACK_FRAME
  795. ret
  796. CFI_ENDPROC
  797. child_rip:
  798. /*
  799. * Here we are in the child and the registers are set as they were
  800. * at kernel_thread() invocation in the parent.
  801. */
  802. movq %rdi, %rax
  803. movq %rsi, %rdi
  804. call *%rax
  805. # exit
  806. xorl %edi, %edi
  807. call do_exit
  808. /*
  809. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  810. *
  811. * C extern interface:
  812. * extern long execve(char *name, char **argv, char **envp)
  813. *
  814. * asm input arguments:
  815. * rdi: name, rsi: argv, rdx: envp
  816. *
  817. * We want to fallback into:
  818. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  819. *
  820. * do_sys_execve asm fallback arguments:
  821. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  822. */
  823. ENTRY(execve)
  824. CFI_STARTPROC
  825. FAKE_STACK_FRAME $0
  826. SAVE_ALL
  827. call sys_execve
  828. movq %rax, RAX(%rsp)
  829. RESTORE_REST
  830. testq %rax,%rax
  831. je int_ret_from_sys_call
  832. RESTORE_ARGS
  833. UNFAKE_STACK_FRAME
  834. ret
  835. CFI_ENDPROC
  836. KPROBE_ENTRY(page_fault)
  837. errorentry do_page_fault
  838. .previous .text
  839. ENTRY(coprocessor_error)
  840. zeroentry do_coprocessor_error
  841. ENTRY(simd_coprocessor_error)
  842. zeroentry do_simd_coprocessor_error
  843. ENTRY(device_not_available)
  844. zeroentry math_state_restore
  845. /* runs on exception stack */
  846. KPROBE_ENTRY(debug)
  847. INTR_FRAME
  848. pushq $0
  849. CFI_ADJUST_CFA_OFFSET 8
  850. paranoidentry do_debug, DEBUG_IST
  851. jmp paranoid_exit
  852. CFI_ENDPROC
  853. .previous .text
  854. /* runs on exception stack */
  855. ENTRY(nmi)
  856. INTR_FRAME
  857. pushq $-1
  858. CFI_ADJUST_CFA_OFFSET 8
  859. paranoidentry do_nmi
  860. /*
  861. * "Paranoid" exit path from exception stack.
  862. * Paranoid because this is used by NMIs and cannot take
  863. * any kernel state for granted.
  864. * We don't do kernel preemption checks here, because only
  865. * NMI should be common and it does not enable IRQs and
  866. * cannot get reschedule ticks.
  867. */
  868. /* ebx: no swapgs flag */
  869. paranoid_exit:
  870. testl %ebx,%ebx /* swapgs needed? */
  871. jnz paranoid_restore
  872. testl $3,CS(%rsp)
  873. jnz paranoid_userspace
  874. paranoid_swapgs:
  875. swapgs
  876. paranoid_restore:
  877. RESTORE_ALL 8
  878. iretq
  879. paranoid_userspace:
  880. GET_THREAD_INFO(%rcx)
  881. movl threadinfo_flags(%rcx),%ebx
  882. andl $_TIF_WORK_MASK,%ebx
  883. jz paranoid_swapgs
  884. movq %rsp,%rdi /* &pt_regs */
  885. call sync_regs
  886. movq %rax,%rsp /* switch stack for scheduling */
  887. testl $_TIF_NEED_RESCHED,%ebx
  888. jnz paranoid_schedule
  889. movl %ebx,%edx /* arg3: thread flags */
  890. sti
  891. xorl %esi,%esi /* arg2: oldset */
  892. movq %rsp,%rdi /* arg1: &pt_regs */
  893. call do_notify_resume
  894. cli
  895. jmp paranoid_userspace
  896. paranoid_schedule:
  897. sti
  898. call schedule
  899. cli
  900. jmp paranoid_userspace
  901. CFI_ENDPROC
  902. KPROBE_ENTRY(int3)
  903. INTR_FRAME
  904. pushq $0
  905. CFI_ADJUST_CFA_OFFSET 8
  906. paranoidentry do_int3, DEBUG_IST
  907. jmp paranoid_exit
  908. CFI_ENDPROC
  909. .previous .text
  910. ENTRY(overflow)
  911. zeroentry do_overflow
  912. ENTRY(bounds)
  913. zeroentry do_bounds
  914. ENTRY(invalid_op)
  915. zeroentry do_invalid_op
  916. ENTRY(coprocessor_segment_overrun)
  917. zeroentry do_coprocessor_segment_overrun
  918. ENTRY(reserved)
  919. zeroentry do_reserved
  920. /* runs on exception stack */
  921. ENTRY(double_fault)
  922. XCPT_FRAME
  923. paranoidentry do_double_fault
  924. jmp paranoid_exit
  925. CFI_ENDPROC
  926. ENTRY(invalid_TSS)
  927. errorentry do_invalid_TSS
  928. ENTRY(segment_not_present)
  929. errorentry do_segment_not_present
  930. /* runs on exception stack */
  931. ENTRY(stack_segment)
  932. XCPT_FRAME
  933. paranoidentry do_stack_segment
  934. jmp paranoid_exit
  935. CFI_ENDPROC
  936. KPROBE_ENTRY(general_protection)
  937. errorentry do_general_protection
  938. .previous .text
  939. ENTRY(alignment_check)
  940. errorentry do_alignment_check
  941. ENTRY(divide_error)
  942. zeroentry do_divide_error
  943. ENTRY(spurious_interrupt_bug)
  944. zeroentry do_spurious_interrupt_bug
  945. #ifdef CONFIG_X86_MCE
  946. /* runs on exception stack */
  947. ENTRY(machine_check)
  948. INTR_FRAME
  949. pushq $0
  950. CFI_ADJUST_CFA_OFFSET 8
  951. paranoidentry do_machine_check
  952. jmp paranoid_exit
  953. CFI_ENDPROC
  954. #endif
  955. ENTRY(call_debug)
  956. zeroentry do_call_debug
  957. ENTRY(call_softirq)
  958. CFI_STARTPROC
  959. movq %gs:pda_irqstackptr,%rax
  960. movq %rsp,%rdx
  961. CFI_DEF_CFA_REGISTER rdx
  962. incl %gs:pda_irqcount
  963. cmove %rax,%rsp
  964. pushq %rdx
  965. /*todo CFI_DEF_CFA_EXPRESSION ...*/
  966. call __do_softirq
  967. popq %rsp
  968. CFI_DEF_CFA_REGISTER rsp
  969. decl %gs:pda_irqcount
  970. ret
  971. CFI_ENDPROC