entry.S 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #define ASSEMBLY 1
  29. #include <linux/config.h>
  30. #include <linux/linkage.h>
  31. #include <asm/segment.h>
  32. #include <asm/smp.h>
  33. #include <asm/cache.h>
  34. #include <asm/errno.h>
  35. #include <asm/dwarf2.h>
  36. #include <asm/calling.h>
  37. #include <asm/asm-offsets.h>
  38. #include <asm/msr.h>
  39. #include <asm/unistd.h>
  40. #include <asm/thread_info.h>
  41. #include <asm/hw_irq.h>
  42. #include <asm/page.h>
  43. .code64
  44. #ifndef CONFIG_PREEMPT
  45. #define retint_kernel retint_restore_args
  46. #endif
  47. /*
  48. * C code is not supposed to know about undefined top of stack. Every time
  49. * a C function with an pt_regs argument is called from the SYSCALL based
  50. * fast path FIXUP_TOP_OF_STACK is needed.
  51. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  52. * manipulation.
  53. */
  54. /* %rsp:at FRAMEEND */
  55. .macro FIXUP_TOP_OF_STACK tmp
  56. movq %gs:pda_oldrsp,\tmp
  57. movq \tmp,RSP(%rsp)
  58. movq $__USER_DS,SS(%rsp)
  59. movq $__USER_CS,CS(%rsp)
  60. movq $-1,RCX(%rsp)
  61. movq R11(%rsp),\tmp /* get eflags */
  62. movq \tmp,EFLAGS(%rsp)
  63. .endm
  64. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  65. movq RSP-\offset(%rsp),\tmp
  66. movq \tmp,%gs:pda_oldrsp
  67. movq EFLAGS-\offset(%rsp),\tmp
  68. movq \tmp,R11-\offset(%rsp)
  69. .endm
  70. .macro FAKE_STACK_FRAME child_rip
  71. /* push in order ss, rsp, eflags, cs, rip */
  72. xorl %eax, %eax
  73. pushq %rax /* ss */
  74. CFI_ADJUST_CFA_OFFSET 8
  75. /*CFI_REL_OFFSET ss,0*/
  76. pushq %rax /* rsp */
  77. CFI_ADJUST_CFA_OFFSET 8
  78. CFI_REL_OFFSET rsp,0
  79. pushq $(1<<9) /* eflags - interrupts on */
  80. CFI_ADJUST_CFA_OFFSET 8
  81. /*CFI_REL_OFFSET rflags,0*/
  82. pushq $__KERNEL_CS /* cs */
  83. CFI_ADJUST_CFA_OFFSET 8
  84. /*CFI_REL_OFFSET cs,0*/
  85. pushq \child_rip /* rip */
  86. CFI_ADJUST_CFA_OFFSET 8
  87. CFI_REL_OFFSET rip,0
  88. pushq %rax /* orig rax */
  89. CFI_ADJUST_CFA_OFFSET 8
  90. .endm
  91. .macro UNFAKE_STACK_FRAME
  92. addq $8*6, %rsp
  93. CFI_ADJUST_CFA_OFFSET -(6*8)
  94. .endm
  95. .macro CFI_DEFAULT_STACK start=1
  96. .if \start
  97. CFI_STARTPROC simple
  98. CFI_DEF_CFA rsp,SS+8
  99. .else
  100. CFI_DEF_CFA_OFFSET SS+8
  101. .endif
  102. CFI_REL_OFFSET r15,R15
  103. CFI_REL_OFFSET r14,R14
  104. CFI_REL_OFFSET r13,R13
  105. CFI_REL_OFFSET r12,R12
  106. CFI_REL_OFFSET rbp,RBP
  107. CFI_REL_OFFSET rbx,RBX
  108. CFI_REL_OFFSET r11,R11
  109. CFI_REL_OFFSET r10,R10
  110. CFI_REL_OFFSET r9,R9
  111. CFI_REL_OFFSET r8,R8
  112. CFI_REL_OFFSET rax,RAX
  113. CFI_REL_OFFSET rcx,RCX
  114. CFI_REL_OFFSET rdx,RDX
  115. CFI_REL_OFFSET rsi,RSI
  116. CFI_REL_OFFSET rdi,RDI
  117. CFI_REL_OFFSET rip,RIP
  118. /*CFI_REL_OFFSET cs,CS*/
  119. /*CFI_REL_OFFSET rflags,EFLAGS*/
  120. CFI_REL_OFFSET rsp,RSP
  121. /*CFI_REL_OFFSET ss,SS*/
  122. .endm
  123. /*
  124. * A newly forked process directly context switches into this.
  125. */
  126. /* rdi: prev */
  127. ENTRY(ret_from_fork)
  128. CFI_DEFAULT_STACK
  129. call schedule_tail
  130. GET_THREAD_INFO(%rcx)
  131. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  132. jnz rff_trace
  133. rff_action:
  134. RESTORE_REST
  135. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  136. je int_ret_from_sys_call
  137. testl $_TIF_IA32,threadinfo_flags(%rcx)
  138. jnz int_ret_from_sys_call
  139. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  140. jmp ret_from_sys_call
  141. rff_trace:
  142. movq %rsp,%rdi
  143. call syscall_trace_leave
  144. GET_THREAD_INFO(%rcx)
  145. jmp rff_action
  146. CFI_ENDPROC
  147. END(ret_from_fork)
  148. /*
  149. * System call entry. Upto 6 arguments in registers are supported.
  150. *
  151. * SYSCALL does not save anything on the stack and does not change the
  152. * stack pointer.
  153. */
  154. /*
  155. * Register setup:
  156. * rax system call number
  157. * rdi arg0
  158. * rcx return address for syscall/sysret, C arg3
  159. * rsi arg1
  160. * rdx arg2
  161. * r10 arg3 (--> moved to rcx for C)
  162. * r8 arg4
  163. * r9 arg5
  164. * r11 eflags for syscall/sysret, temporary for C
  165. * r12-r15,rbp,rbx saved by C code, not touched.
  166. *
  167. * Interrupts are off on entry.
  168. * Only called from user space.
  169. *
  170. * XXX if we had a free scratch register we could save the RSP into the stack frame
  171. * and report it properly in ps. Unfortunately we haven't.
  172. *
  173. * When user can change the frames always force IRET. That is because
  174. * it deals with uncanonical addresses better. SYSRET has trouble
  175. * with them due to bugs in both AMD and Intel CPUs.
  176. */
  177. ENTRY(system_call)
  178. CFI_STARTPROC simple
  179. CFI_DEF_CFA rsp,0
  180. CFI_REGISTER rip,rcx
  181. /*CFI_REGISTER rflags,r11*/
  182. swapgs
  183. movq %rsp,%gs:pda_oldrsp
  184. movq %gs:pda_kernelstack,%rsp
  185. sti
  186. SAVE_ARGS 8,1
  187. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  188. movq %rcx,RIP-ARGOFFSET(%rsp)
  189. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  190. GET_THREAD_INFO(%rcx)
  191. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  192. CFI_REMEMBER_STATE
  193. jnz tracesys
  194. cmpq $__NR_syscall_max,%rax
  195. ja badsys
  196. movq %r10,%rcx
  197. call *sys_call_table(,%rax,8) # XXX: rip relative
  198. movq %rax,RAX-ARGOFFSET(%rsp)
  199. /*
  200. * Syscall return path ending with SYSRET (fast path)
  201. * Has incomplete stack frame and undefined top of stack.
  202. */
  203. .globl ret_from_sys_call
  204. ret_from_sys_call:
  205. movl $_TIF_ALLWORK_MASK,%edi
  206. /* edi: flagmask */
  207. sysret_check:
  208. GET_THREAD_INFO(%rcx)
  209. cli
  210. movl threadinfo_flags(%rcx),%edx
  211. andl %edi,%edx
  212. CFI_REMEMBER_STATE
  213. jnz sysret_careful
  214. movq RIP-ARGOFFSET(%rsp),%rcx
  215. CFI_REGISTER rip,rcx
  216. RESTORE_ARGS 0,-ARG_SKIP,1
  217. /*CFI_REGISTER rflags,r11*/
  218. movq %gs:pda_oldrsp,%rsp
  219. swapgs
  220. sysretq
  221. /* Handle reschedules */
  222. /* edx: work, edi: workmask */
  223. sysret_careful:
  224. CFI_RESTORE_STATE
  225. bt $TIF_NEED_RESCHED,%edx
  226. jnc sysret_signal
  227. sti
  228. pushq %rdi
  229. CFI_ADJUST_CFA_OFFSET 8
  230. call schedule
  231. popq %rdi
  232. CFI_ADJUST_CFA_OFFSET -8
  233. jmp sysret_check
  234. /* Handle a signal */
  235. sysret_signal:
  236. sti
  237. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  238. jz 1f
  239. /* Really a signal */
  240. /* edx: work flags (arg3) */
  241. leaq do_notify_resume(%rip),%rax
  242. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  243. xorl %esi,%esi # oldset -> arg2
  244. call ptregscall_common
  245. 1: movl $_TIF_NEED_RESCHED,%edi
  246. /* Use IRET because user could have changed frame. This
  247. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  248. cli
  249. jmp int_with_check
  250. badsys:
  251. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  252. jmp ret_from_sys_call
  253. /* Do syscall tracing */
  254. tracesys:
  255. CFI_RESTORE_STATE
  256. SAVE_REST
  257. movq $-ENOSYS,RAX(%rsp)
  258. FIXUP_TOP_OF_STACK %rdi
  259. movq %rsp,%rdi
  260. call syscall_trace_enter
  261. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  262. RESTORE_REST
  263. cmpq $__NR_syscall_max,%rax
  264. ja 1f
  265. movq %r10,%rcx /* fixup for C */
  266. call *sys_call_table(,%rax,8)
  267. 1: movq %rax,RAX-ARGOFFSET(%rsp)
  268. /* Use IRET because user could have changed frame */
  269. jmp int_ret_from_sys_call
  270. CFI_ENDPROC
  271. END(system_call)
  272. /*
  273. * Syscall return path ending with IRET.
  274. * Has correct top of stack, but partial stack frame.
  275. */
  276. ENTRY(int_ret_from_sys_call)
  277. CFI_STARTPROC simple
  278. CFI_DEF_CFA rsp,SS+8-ARGOFFSET
  279. /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
  280. CFI_REL_OFFSET rsp,RSP-ARGOFFSET
  281. /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
  282. /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
  283. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  284. CFI_REL_OFFSET rdx,RDX-ARGOFFSET
  285. CFI_REL_OFFSET rcx,RCX-ARGOFFSET
  286. CFI_REL_OFFSET rax,RAX-ARGOFFSET
  287. CFI_REL_OFFSET rdi,RDI-ARGOFFSET
  288. CFI_REL_OFFSET rsi,RSI-ARGOFFSET
  289. CFI_REL_OFFSET r8,R8-ARGOFFSET
  290. CFI_REL_OFFSET r9,R9-ARGOFFSET
  291. CFI_REL_OFFSET r10,R10-ARGOFFSET
  292. CFI_REL_OFFSET r11,R11-ARGOFFSET
  293. cli
  294. testl $3,CS-ARGOFFSET(%rsp)
  295. je retint_restore_args
  296. movl $_TIF_ALLWORK_MASK,%edi
  297. /* edi: mask to check */
  298. int_with_check:
  299. GET_THREAD_INFO(%rcx)
  300. movl threadinfo_flags(%rcx),%edx
  301. andl %edi,%edx
  302. jnz int_careful
  303. andl $~TS_COMPAT,threadinfo_status(%rcx)
  304. jmp retint_swapgs
  305. /* Either reschedule or signal or syscall exit tracking needed. */
  306. /* First do a reschedule test. */
  307. /* edx: work, edi: workmask */
  308. int_careful:
  309. bt $TIF_NEED_RESCHED,%edx
  310. jnc int_very_careful
  311. sti
  312. pushq %rdi
  313. CFI_ADJUST_CFA_OFFSET 8
  314. call schedule
  315. popq %rdi
  316. CFI_ADJUST_CFA_OFFSET -8
  317. cli
  318. jmp int_with_check
  319. /* handle signals and tracing -- both require a full stack frame */
  320. int_very_careful:
  321. sti
  322. SAVE_REST
  323. /* Check for syscall exit trace */
  324. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  325. jz int_signal
  326. pushq %rdi
  327. CFI_ADJUST_CFA_OFFSET 8
  328. leaq 8(%rsp),%rdi # &ptregs -> arg1
  329. call syscall_trace_leave
  330. popq %rdi
  331. CFI_ADJUST_CFA_OFFSET -8
  332. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  333. cli
  334. jmp int_restore_rest
  335. int_signal:
  336. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  337. jz 1f
  338. movq %rsp,%rdi # &ptregs -> arg1
  339. xorl %esi,%esi # oldset -> arg2
  340. call do_notify_resume
  341. 1: movl $_TIF_NEED_RESCHED,%edi
  342. int_restore_rest:
  343. RESTORE_REST
  344. cli
  345. jmp int_with_check
  346. CFI_ENDPROC
  347. END(int_ret_from_sys_call)
  348. /*
  349. * Certain special system calls that need to save a complete full stack frame.
  350. */
  351. .macro PTREGSCALL label,func,arg
  352. .globl \label
  353. \label:
  354. leaq \func(%rip),%rax
  355. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  356. jmp ptregscall_common
  357. END(\label)
  358. .endm
  359. CFI_STARTPROC
  360. PTREGSCALL stub_clone, sys_clone, %r8
  361. PTREGSCALL stub_fork, sys_fork, %rdi
  362. PTREGSCALL stub_vfork, sys_vfork, %rdi
  363. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  364. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  365. PTREGSCALL stub_iopl, sys_iopl, %rsi
  366. ENTRY(ptregscall_common)
  367. popq %r11
  368. CFI_ADJUST_CFA_OFFSET -8
  369. CFI_REGISTER rip, r11
  370. SAVE_REST
  371. movq %r11, %r15
  372. CFI_REGISTER rip, r15
  373. FIXUP_TOP_OF_STACK %r11
  374. call *%rax
  375. RESTORE_TOP_OF_STACK %r11
  376. movq %r15, %r11
  377. CFI_REGISTER rip, r11
  378. RESTORE_REST
  379. pushq %r11
  380. CFI_ADJUST_CFA_OFFSET 8
  381. CFI_REL_OFFSET rip, 0
  382. ret
  383. CFI_ENDPROC
  384. END(ptregscall_common)
  385. ENTRY(stub_execve)
  386. CFI_STARTPROC
  387. popq %r11
  388. CFI_ADJUST_CFA_OFFSET -8
  389. CFI_REGISTER rip, r11
  390. SAVE_REST
  391. FIXUP_TOP_OF_STACK %r11
  392. call sys_execve
  393. RESTORE_TOP_OF_STACK %r11
  394. movq %rax,RAX(%rsp)
  395. RESTORE_REST
  396. jmp int_ret_from_sys_call
  397. CFI_ENDPROC
  398. END(stub_execve)
  399. /*
  400. * sigreturn is special because it needs to restore all registers on return.
  401. * This cannot be done with SYSRET, so use the IRET return path instead.
  402. */
  403. ENTRY(stub_rt_sigreturn)
  404. CFI_STARTPROC
  405. addq $8, %rsp
  406. CFI_ADJUST_CFA_OFFSET -8
  407. SAVE_REST
  408. movq %rsp,%rdi
  409. FIXUP_TOP_OF_STACK %r11
  410. call sys_rt_sigreturn
  411. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  412. RESTORE_REST
  413. jmp int_ret_from_sys_call
  414. CFI_ENDPROC
  415. END(stub_rt_sigreturn)
  416. /*
  417. * initial frame state for interrupts and exceptions
  418. */
  419. .macro _frame ref
  420. CFI_STARTPROC simple
  421. CFI_DEF_CFA rsp,SS+8-\ref
  422. /*CFI_REL_OFFSET ss,SS-\ref*/
  423. CFI_REL_OFFSET rsp,RSP-\ref
  424. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  425. /*CFI_REL_OFFSET cs,CS-\ref*/
  426. CFI_REL_OFFSET rip,RIP-\ref
  427. .endm
  428. /* initial frame state for interrupts (and exceptions without error code) */
  429. #define INTR_FRAME _frame RIP
  430. /* initial frame state for exceptions with error code (and interrupts with
  431. vector already pushed) */
  432. #define XCPT_FRAME _frame ORIG_RAX
  433. /*
  434. * Interrupt entry/exit.
  435. *
  436. * Interrupt entry points save only callee clobbered registers in fast path.
  437. *
  438. * Entry runs with interrupts off.
  439. */
  440. /* 0(%rsp): interrupt number */
  441. .macro interrupt func
  442. cld
  443. SAVE_ARGS
  444. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  445. pushq %rbp
  446. CFI_ADJUST_CFA_OFFSET 8
  447. CFI_REL_OFFSET rbp, 0
  448. movq %rsp,%rbp
  449. CFI_DEF_CFA_REGISTER rbp
  450. testl $3,CS(%rdi)
  451. je 1f
  452. swapgs
  453. 1: incl %gs:pda_irqcount # RED-PEN should check preempt count
  454. cmoveq %gs:pda_irqstackptr,%rsp
  455. call \func
  456. .endm
  457. ENTRY(common_interrupt)
  458. XCPT_FRAME
  459. interrupt do_IRQ
  460. /* 0(%rsp): oldrsp-ARGOFFSET */
  461. ret_from_intr:
  462. cli
  463. decl %gs:pda_irqcount
  464. leaveq
  465. CFI_DEF_CFA_REGISTER rsp
  466. CFI_ADJUST_CFA_OFFSET -8
  467. exit_intr:
  468. GET_THREAD_INFO(%rcx)
  469. testl $3,CS-ARGOFFSET(%rsp)
  470. je retint_kernel
  471. /* Interrupt came from user space */
  472. /*
  473. * Has a correct top of stack, but a partial stack frame
  474. * %rcx: thread info. Interrupts off.
  475. */
  476. retint_with_reschedule:
  477. movl $_TIF_WORK_MASK,%edi
  478. retint_check:
  479. movl threadinfo_flags(%rcx),%edx
  480. andl %edi,%edx
  481. CFI_REMEMBER_STATE
  482. jnz retint_careful
  483. retint_swapgs:
  484. swapgs
  485. retint_restore_args:
  486. cli
  487. RESTORE_ARGS 0,8,0
  488. iret_label:
  489. iretq
  490. .section __ex_table,"a"
  491. .quad iret_label,bad_iret
  492. .previous
  493. .section .fixup,"ax"
  494. /* force a signal here? this matches i386 behaviour */
  495. /* running with kernel gs */
  496. bad_iret:
  497. movq $11,%rdi /* SIGSEGV */
  498. sti
  499. jmp do_exit
  500. .previous
  501. /* edi: workmask, edx: work */
  502. retint_careful:
  503. CFI_RESTORE_STATE
  504. bt $TIF_NEED_RESCHED,%edx
  505. jnc retint_signal
  506. sti
  507. pushq %rdi
  508. CFI_ADJUST_CFA_OFFSET 8
  509. call schedule
  510. popq %rdi
  511. CFI_ADJUST_CFA_OFFSET -8
  512. GET_THREAD_INFO(%rcx)
  513. cli
  514. jmp retint_check
  515. retint_signal:
  516. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  517. jz retint_swapgs
  518. sti
  519. SAVE_REST
  520. movq $-1,ORIG_RAX(%rsp)
  521. xorl %esi,%esi # oldset
  522. movq %rsp,%rdi # &pt_regs
  523. call do_notify_resume
  524. RESTORE_REST
  525. cli
  526. movl $_TIF_NEED_RESCHED,%edi
  527. GET_THREAD_INFO(%rcx)
  528. jmp retint_check
  529. #ifdef CONFIG_PREEMPT
  530. /* Returning to kernel space. Check if we need preemption */
  531. /* rcx: threadinfo. interrupts off. */
  532. .p2align
  533. retint_kernel:
  534. cmpl $0,threadinfo_preempt_count(%rcx)
  535. jnz retint_restore_args
  536. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  537. jnc retint_restore_args
  538. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  539. jnc retint_restore_args
  540. call preempt_schedule_irq
  541. jmp exit_intr
  542. #endif
  543. CFI_ENDPROC
  544. END(common_interrupt)
  545. /*
  546. * APIC interrupts.
  547. */
  548. .macro apicinterrupt num,func
  549. INTR_FRAME
  550. pushq $\num-256
  551. CFI_ADJUST_CFA_OFFSET 8
  552. interrupt \func
  553. jmp ret_from_intr
  554. CFI_ENDPROC
  555. .endm
  556. ENTRY(thermal_interrupt)
  557. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  558. END(thermal_interrupt)
  559. ENTRY(threshold_interrupt)
  560. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  561. END(threshold_interrupt)
  562. #ifdef CONFIG_SMP
  563. ENTRY(reschedule_interrupt)
  564. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  565. END(reschedule_interrupt)
  566. .macro INVALIDATE_ENTRY num
  567. ENTRY(invalidate_interrupt\num)
  568. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  569. END(invalidate_interrupt\num)
  570. .endm
  571. INVALIDATE_ENTRY 0
  572. INVALIDATE_ENTRY 1
  573. INVALIDATE_ENTRY 2
  574. INVALIDATE_ENTRY 3
  575. INVALIDATE_ENTRY 4
  576. INVALIDATE_ENTRY 5
  577. INVALIDATE_ENTRY 6
  578. INVALIDATE_ENTRY 7
  579. ENTRY(call_function_interrupt)
  580. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  581. END(call_function_interrupt)
  582. #endif
  583. #ifdef CONFIG_X86_LOCAL_APIC
  584. ENTRY(apic_timer_interrupt)
  585. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  586. END(apic_timer_interrupt)
  587. ENTRY(error_interrupt)
  588. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  589. END(error_interrupt)
  590. ENTRY(spurious_interrupt)
  591. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  592. END(spurious_interrupt)
  593. #endif
  594. /*
  595. * Exception entry points.
  596. */
  597. .macro zeroentry sym
  598. INTR_FRAME
  599. pushq $0 /* push error code/oldrax */
  600. CFI_ADJUST_CFA_OFFSET 8
  601. pushq %rax /* push real oldrax to the rdi slot */
  602. CFI_ADJUST_CFA_OFFSET 8
  603. leaq \sym(%rip),%rax
  604. jmp error_entry
  605. CFI_ENDPROC
  606. .endm
  607. .macro errorentry sym
  608. XCPT_FRAME
  609. pushq %rax
  610. CFI_ADJUST_CFA_OFFSET 8
  611. leaq \sym(%rip),%rax
  612. jmp error_entry
  613. CFI_ENDPROC
  614. .endm
  615. /* error code is on the stack already */
  616. /* handle NMI like exceptions that can happen everywhere */
  617. .macro paranoidentry sym, ist=0
  618. SAVE_ALL
  619. cld
  620. movl $1,%ebx
  621. movl $MSR_GS_BASE,%ecx
  622. rdmsr
  623. testl %edx,%edx
  624. js 1f
  625. swapgs
  626. xorl %ebx,%ebx
  627. 1:
  628. .if \ist
  629. movq %gs:pda_data_offset, %rbp
  630. .endif
  631. movq %rsp,%rdi
  632. movq ORIG_RAX(%rsp),%rsi
  633. movq $-1,ORIG_RAX(%rsp)
  634. .if \ist
  635. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  636. .endif
  637. call \sym
  638. .if \ist
  639. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  640. .endif
  641. cli
  642. .endm
  643. /*
  644. * Exception entry point. This expects an error code/orig_rax on the stack
  645. * and the exception handler in %rax.
  646. */
  647. ENTRY(error_entry)
  648. _frame RDI
  649. /* rdi slot contains rax, oldrax contains error code */
  650. cld
  651. subq $14*8,%rsp
  652. CFI_ADJUST_CFA_OFFSET (14*8)
  653. movq %rsi,13*8(%rsp)
  654. CFI_REL_OFFSET rsi,RSI
  655. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  656. movq %rdx,12*8(%rsp)
  657. CFI_REL_OFFSET rdx,RDX
  658. movq %rcx,11*8(%rsp)
  659. CFI_REL_OFFSET rcx,RCX
  660. movq %rsi,10*8(%rsp) /* store rax */
  661. CFI_REL_OFFSET rax,RAX
  662. movq %r8, 9*8(%rsp)
  663. CFI_REL_OFFSET r8,R8
  664. movq %r9, 8*8(%rsp)
  665. CFI_REL_OFFSET r9,R9
  666. movq %r10,7*8(%rsp)
  667. CFI_REL_OFFSET r10,R10
  668. movq %r11,6*8(%rsp)
  669. CFI_REL_OFFSET r11,R11
  670. movq %rbx,5*8(%rsp)
  671. CFI_REL_OFFSET rbx,RBX
  672. movq %rbp,4*8(%rsp)
  673. CFI_REL_OFFSET rbp,RBP
  674. movq %r12,3*8(%rsp)
  675. CFI_REL_OFFSET r12,R12
  676. movq %r13,2*8(%rsp)
  677. CFI_REL_OFFSET r13,R13
  678. movq %r14,1*8(%rsp)
  679. CFI_REL_OFFSET r14,R14
  680. movq %r15,(%rsp)
  681. CFI_REL_OFFSET r15,R15
  682. xorl %ebx,%ebx
  683. testl $3,CS(%rsp)
  684. je error_kernelspace
  685. error_swapgs:
  686. swapgs
  687. error_sti:
  688. movq %rdi,RDI(%rsp)
  689. movq %rsp,%rdi
  690. movq ORIG_RAX(%rsp),%rsi /* get error code */
  691. movq $-1,ORIG_RAX(%rsp)
  692. call *%rax
  693. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  694. error_exit:
  695. movl %ebx,%eax
  696. RESTORE_REST
  697. cli
  698. GET_THREAD_INFO(%rcx)
  699. testl %eax,%eax
  700. jne retint_kernel
  701. movl threadinfo_flags(%rcx),%edx
  702. movl $_TIF_WORK_MASK,%edi
  703. andl %edi,%edx
  704. jnz retint_careful
  705. swapgs
  706. RESTORE_ARGS 0,8,0
  707. jmp iret_label
  708. CFI_ENDPROC
  709. error_kernelspace:
  710. incl %ebx
  711. /* There are two places in the kernel that can potentially fault with
  712. usergs. Handle them here. The exception handlers after
  713. iret run with kernel gs again, so don't set the user space flag.
  714. B stepping K8s sometimes report an truncated RIP for IRET
  715. exceptions returning to compat mode. Check for these here too. */
  716. leaq iret_label(%rip),%rbp
  717. cmpq %rbp,RIP(%rsp)
  718. je error_swapgs
  719. movl %ebp,%ebp /* zero extend */
  720. cmpq %rbp,RIP(%rsp)
  721. je error_swapgs
  722. cmpq $gs_change,RIP(%rsp)
  723. je error_swapgs
  724. jmp error_sti
  725. END(error_entry)
  726. /* Reload gs selector with exception handling */
  727. /* edi: new selector */
  728. ENTRY(load_gs_index)
  729. CFI_STARTPROC
  730. pushf
  731. CFI_ADJUST_CFA_OFFSET 8
  732. cli
  733. swapgs
  734. gs_change:
  735. movl %edi,%gs
  736. 2: mfence /* workaround */
  737. swapgs
  738. popf
  739. CFI_ADJUST_CFA_OFFSET -8
  740. ret
  741. CFI_ENDPROC
  742. ENDPROC(load_gs_index)
  743. .section __ex_table,"a"
  744. .align 8
  745. .quad gs_change,bad_gs
  746. .previous
  747. .section .fixup,"ax"
  748. /* running with kernelgs */
  749. bad_gs:
  750. swapgs /* switch back to user gs */
  751. xorl %eax,%eax
  752. movl %eax,%gs
  753. jmp 2b
  754. .previous
  755. /*
  756. * Create a kernel thread.
  757. *
  758. * C extern interface:
  759. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  760. *
  761. * asm input arguments:
  762. * rdi: fn, rsi: arg, rdx: flags
  763. */
  764. ENTRY(kernel_thread)
  765. CFI_STARTPROC
  766. FAKE_STACK_FRAME $child_rip
  767. SAVE_ALL
  768. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  769. movq %rdx,%rdi
  770. orq kernel_thread_flags(%rip),%rdi
  771. movq $-1, %rsi
  772. movq %rsp, %rdx
  773. xorl %r8d,%r8d
  774. xorl %r9d,%r9d
  775. # clone now
  776. call do_fork
  777. movq %rax,RAX(%rsp)
  778. xorl %edi,%edi
  779. /*
  780. * It isn't worth to check for reschedule here,
  781. * so internally to the x86_64 port you can rely on kernel_thread()
  782. * not to reschedule the child before returning, this avoids the need
  783. * of hacks for example to fork off the per-CPU idle tasks.
  784. * [Hopefully no generic code relies on the reschedule -AK]
  785. */
  786. RESTORE_ALL
  787. UNFAKE_STACK_FRAME
  788. ret
  789. CFI_ENDPROC
  790. ENDPROC(kernel_thread)
  791. child_rip:
  792. /*
  793. * Here we are in the child and the registers are set as they were
  794. * at kernel_thread() invocation in the parent.
  795. */
  796. movq %rdi, %rax
  797. movq %rsi, %rdi
  798. call *%rax
  799. # exit
  800. xorl %edi, %edi
  801. call do_exit
  802. ENDPROC(child_rip)
  803. /*
  804. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  805. *
  806. * C extern interface:
  807. * extern long execve(char *name, char **argv, char **envp)
  808. *
  809. * asm input arguments:
  810. * rdi: name, rsi: argv, rdx: envp
  811. *
  812. * We want to fallback into:
  813. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  814. *
  815. * do_sys_execve asm fallback arguments:
  816. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  817. */
  818. ENTRY(execve)
  819. CFI_STARTPROC
  820. FAKE_STACK_FRAME $0
  821. SAVE_ALL
  822. call sys_execve
  823. movq %rax, RAX(%rsp)
  824. RESTORE_REST
  825. testq %rax,%rax
  826. je int_ret_from_sys_call
  827. RESTORE_ARGS
  828. UNFAKE_STACK_FRAME
  829. ret
  830. CFI_ENDPROC
  831. ENDPROC(execve)
  832. KPROBE_ENTRY(page_fault)
  833. errorentry do_page_fault
  834. END(page_fault)
  835. .previous .text
  836. ENTRY(coprocessor_error)
  837. zeroentry do_coprocessor_error
  838. END(coprocessor_error)
  839. ENTRY(simd_coprocessor_error)
  840. zeroentry do_simd_coprocessor_error
  841. END(simd_coprocessor_error)
  842. ENTRY(device_not_available)
  843. zeroentry math_state_restore
  844. END(device_not_available)
  845. /* runs on exception stack */
  846. KPROBE_ENTRY(debug)
  847. INTR_FRAME
  848. pushq $0
  849. CFI_ADJUST_CFA_OFFSET 8
  850. paranoidentry do_debug, DEBUG_STACK
  851. jmp paranoid_exit
  852. CFI_ENDPROC
  853. END(debug)
  854. .previous .text
  855. /* runs on exception stack */
  856. KPROBE_ENTRY(nmi)
  857. INTR_FRAME
  858. pushq $-1
  859. CFI_ADJUST_CFA_OFFSET 8
  860. paranoidentry do_nmi
  861. /*
  862. * "Paranoid" exit path from exception stack.
  863. * Paranoid because this is used by NMIs and cannot take
  864. * any kernel state for granted.
  865. * We don't do kernel preemption checks here, because only
  866. * NMI should be common and it does not enable IRQs and
  867. * cannot get reschedule ticks.
  868. */
  869. /* ebx: no swapgs flag */
  870. paranoid_exit:
  871. testl %ebx,%ebx /* swapgs needed? */
  872. jnz paranoid_restore
  873. testl $3,CS(%rsp)
  874. jnz paranoid_userspace
  875. paranoid_swapgs:
  876. swapgs
  877. paranoid_restore:
  878. RESTORE_ALL 8
  879. iretq
  880. paranoid_userspace:
  881. GET_THREAD_INFO(%rcx)
  882. movl threadinfo_flags(%rcx),%ebx
  883. andl $_TIF_WORK_MASK,%ebx
  884. jz paranoid_swapgs
  885. movq %rsp,%rdi /* &pt_regs */
  886. call sync_regs
  887. movq %rax,%rsp /* switch stack for scheduling */
  888. testl $_TIF_NEED_RESCHED,%ebx
  889. jnz paranoid_schedule
  890. movl %ebx,%edx /* arg3: thread flags */
  891. sti
  892. xorl %esi,%esi /* arg2: oldset */
  893. movq %rsp,%rdi /* arg1: &pt_regs */
  894. call do_notify_resume
  895. cli
  896. jmp paranoid_userspace
  897. paranoid_schedule:
  898. sti
  899. call schedule
  900. cli
  901. jmp paranoid_userspace
  902. CFI_ENDPROC
  903. END(nmi)
  904. .previous .text
  905. KPROBE_ENTRY(int3)
  906. INTR_FRAME
  907. pushq $0
  908. CFI_ADJUST_CFA_OFFSET 8
  909. paranoidentry do_int3, DEBUG_STACK
  910. jmp paranoid_exit
  911. CFI_ENDPROC
  912. END(int3)
  913. .previous .text
  914. ENTRY(overflow)
  915. zeroentry do_overflow
  916. END(overflow)
  917. ENTRY(bounds)
  918. zeroentry do_bounds
  919. END(bounds)
  920. ENTRY(invalid_op)
  921. zeroentry do_invalid_op
  922. END(invalid_op)
  923. ENTRY(coprocessor_segment_overrun)
  924. zeroentry do_coprocessor_segment_overrun
  925. END(coprocessor_segment_overrun)
  926. ENTRY(reserved)
  927. zeroentry do_reserved
  928. END(reserved)
  929. /* runs on exception stack */
  930. ENTRY(double_fault)
  931. XCPT_FRAME
  932. paranoidentry do_double_fault
  933. jmp paranoid_exit
  934. CFI_ENDPROC
  935. END(double_fault)
  936. ENTRY(invalid_TSS)
  937. errorentry do_invalid_TSS
  938. END(invalid_TSS)
  939. ENTRY(segment_not_present)
  940. errorentry do_segment_not_present
  941. END(segment_not_present)
  942. /* runs on exception stack */
  943. ENTRY(stack_segment)
  944. XCPT_FRAME
  945. paranoidentry do_stack_segment
  946. jmp paranoid_exit
  947. CFI_ENDPROC
  948. END(stack_segment)
  949. KPROBE_ENTRY(general_protection)
  950. errorentry do_general_protection
  951. END(general_protection)
  952. .previous .text
  953. ENTRY(alignment_check)
  954. errorentry do_alignment_check
  955. END(alignment_check)
  956. ENTRY(divide_error)
  957. zeroentry do_divide_error
  958. END(divide_error)
  959. ENTRY(spurious_interrupt_bug)
  960. zeroentry do_spurious_interrupt_bug
  961. END(spurious_interrupt_bug)
  962. #ifdef CONFIG_X86_MCE
  963. /* runs on exception stack */
  964. ENTRY(machine_check)
  965. INTR_FRAME
  966. pushq $0
  967. CFI_ADJUST_CFA_OFFSET 8
  968. paranoidentry do_machine_check
  969. jmp paranoid_exit
  970. CFI_ENDPROC
  971. END(machine_check)
  972. #endif
  973. ENTRY(call_softirq)
  974. CFI_STARTPROC
  975. movq %gs:pda_irqstackptr,%rax
  976. movq %rsp,%rdx
  977. CFI_DEF_CFA_REGISTER rdx
  978. incl %gs:pda_irqcount
  979. cmove %rax,%rsp
  980. pushq %rdx
  981. /*todo CFI_DEF_CFA_EXPRESSION ...*/
  982. call __do_softirq
  983. popq %rsp
  984. CFI_DEF_CFA_REGISTER rsp
  985. decl %gs:pda_irqcount
  986. ret
  987. CFI_ENDPROC
  988. ENDPROC(call_softirq)
  989. #ifdef CONFIG_STACK_UNWIND
  990. ENTRY(arch_unwind_init_running)
  991. CFI_STARTPROC
  992. movq %r15, R15(%rdi)
  993. movq %r14, R14(%rdi)
  994. xchgq %rsi, %rdx
  995. movq %r13, R13(%rdi)
  996. movq %r12, R12(%rdi)
  997. xorl %eax, %eax
  998. movq %rbp, RBP(%rdi)
  999. movq %rbx, RBX(%rdi)
  1000. movq (%rsp), %rcx
  1001. movq %rax, R11(%rdi)
  1002. movq %rax, R10(%rdi)
  1003. movq %rax, R9(%rdi)
  1004. movq %rax, R8(%rdi)
  1005. movq %rax, RAX(%rdi)
  1006. movq %rax, RCX(%rdi)
  1007. movq %rax, RDX(%rdi)
  1008. movq %rax, RSI(%rdi)
  1009. movq %rax, RDI(%rdi)
  1010. movq %rax, ORIG_RAX(%rdi)
  1011. movq %rcx, RIP(%rdi)
  1012. leaq 8(%rsp), %rcx
  1013. movq $__KERNEL_CS, CS(%rdi)
  1014. movq %rax, EFLAGS(%rdi)
  1015. movq %rcx, RSP(%rdi)
  1016. movq $__KERNEL_DS, SS(%rdi)
  1017. jmpq *%rdx
  1018. CFI_ENDPROC
  1019. ENDPROC(arch_unwind_init_running)
  1020. #endif