entry.S 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #include <linux/linkage.h>
  29. #include <asm/segment.h>
  30. #include <asm/cache.h>
  31. #include <asm/errno.h>
  32. #include <asm/dwarf2.h>
  33. #include <asm/calling.h>
  34. #include <asm/asm-offsets.h>
  35. #include <asm/msr.h>
  36. #include <asm/unistd.h>
  37. #include <asm/thread_info.h>
  38. #include <asm/hw_irq.h>
  39. #include <asm/page.h>
  40. #include <asm/irqflags.h>
  41. .code64
  42. #ifndef CONFIG_PREEMPT
  43. #define retint_kernel retint_restore_args
  44. #endif
  45. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  46. #ifdef CONFIG_TRACE_IRQFLAGS
  47. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  48. jnc 1f
  49. TRACE_IRQS_ON
  50. 1:
  51. #endif
  52. .endm
  53. /*
  54. * C code is not supposed to know about undefined top of stack. Every time
  55. * a C function with an pt_regs argument is called from the SYSCALL based
  56. * fast path FIXUP_TOP_OF_STACK is needed.
  57. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  58. * manipulation.
  59. */
  60. /* %rsp:at FRAMEEND */
  61. .macro FIXUP_TOP_OF_STACK tmp
  62. movq %gs:pda_oldrsp,\tmp
  63. movq \tmp,RSP(%rsp)
  64. movq $__USER_DS,SS(%rsp)
  65. movq $__USER_CS,CS(%rsp)
  66. movq $-1,RCX(%rsp)
  67. movq R11(%rsp),\tmp /* get eflags */
  68. movq \tmp,EFLAGS(%rsp)
  69. .endm
  70. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  71. movq RSP-\offset(%rsp),\tmp
  72. movq \tmp,%gs:pda_oldrsp
  73. movq EFLAGS-\offset(%rsp),\tmp
  74. movq \tmp,R11-\offset(%rsp)
  75. .endm
  76. .macro FAKE_STACK_FRAME child_rip
  77. /* push in order ss, rsp, eflags, cs, rip */
  78. xorl %eax, %eax
  79. pushq %rax /* ss */
  80. CFI_ADJUST_CFA_OFFSET 8
  81. /*CFI_REL_OFFSET ss,0*/
  82. pushq %rax /* rsp */
  83. CFI_ADJUST_CFA_OFFSET 8
  84. CFI_REL_OFFSET rsp,0
  85. pushq $(1<<9) /* eflags - interrupts on */
  86. CFI_ADJUST_CFA_OFFSET 8
  87. /*CFI_REL_OFFSET rflags,0*/
  88. pushq $__KERNEL_CS /* cs */
  89. CFI_ADJUST_CFA_OFFSET 8
  90. /*CFI_REL_OFFSET cs,0*/
  91. pushq \child_rip /* rip */
  92. CFI_ADJUST_CFA_OFFSET 8
  93. CFI_REL_OFFSET rip,0
  94. pushq %rax /* orig rax */
  95. CFI_ADJUST_CFA_OFFSET 8
  96. .endm
  97. .macro UNFAKE_STACK_FRAME
  98. addq $8*6, %rsp
  99. CFI_ADJUST_CFA_OFFSET -(6*8)
  100. .endm
  101. .macro CFI_DEFAULT_STACK start=1
  102. .if \start
  103. CFI_STARTPROC simple
  104. CFI_DEF_CFA rsp,SS+8
  105. .else
  106. CFI_DEF_CFA_OFFSET SS+8
  107. .endif
  108. CFI_REL_OFFSET r15,R15
  109. CFI_REL_OFFSET r14,R14
  110. CFI_REL_OFFSET r13,R13
  111. CFI_REL_OFFSET r12,R12
  112. CFI_REL_OFFSET rbp,RBP
  113. CFI_REL_OFFSET rbx,RBX
  114. CFI_REL_OFFSET r11,R11
  115. CFI_REL_OFFSET r10,R10
  116. CFI_REL_OFFSET r9,R9
  117. CFI_REL_OFFSET r8,R8
  118. CFI_REL_OFFSET rax,RAX
  119. CFI_REL_OFFSET rcx,RCX
  120. CFI_REL_OFFSET rdx,RDX
  121. CFI_REL_OFFSET rsi,RSI
  122. CFI_REL_OFFSET rdi,RDI
  123. CFI_REL_OFFSET rip,RIP
  124. /*CFI_REL_OFFSET cs,CS*/
  125. /*CFI_REL_OFFSET rflags,EFLAGS*/
  126. CFI_REL_OFFSET rsp,RSP
  127. /*CFI_REL_OFFSET ss,SS*/
  128. .endm
  129. /*
  130. * A newly forked process directly context switches into this.
  131. */
  132. /* rdi: prev */
  133. ENTRY(ret_from_fork)
  134. CFI_DEFAULT_STACK
  135. call schedule_tail
  136. GET_THREAD_INFO(%rcx)
  137. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  138. jnz rff_trace
  139. rff_action:
  140. RESTORE_REST
  141. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  142. je int_ret_from_sys_call
  143. testl $_TIF_IA32,threadinfo_flags(%rcx)
  144. jnz int_ret_from_sys_call
  145. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  146. jmp ret_from_sys_call
  147. rff_trace:
  148. movq %rsp,%rdi
  149. call syscall_trace_leave
  150. GET_THREAD_INFO(%rcx)
  151. jmp rff_action
  152. CFI_ENDPROC
  153. END(ret_from_fork)
  154. /*
  155. * System call entry. Upto 6 arguments in registers are supported.
  156. *
  157. * SYSCALL does not save anything on the stack and does not change the
  158. * stack pointer.
  159. */
  160. /*
  161. * Register setup:
  162. * rax system call number
  163. * rdi arg0
  164. * rcx return address for syscall/sysret, C arg3
  165. * rsi arg1
  166. * rdx arg2
  167. * r10 arg3 (--> moved to rcx for C)
  168. * r8 arg4
  169. * r9 arg5
  170. * r11 eflags for syscall/sysret, temporary for C
  171. * r12-r15,rbp,rbx saved by C code, not touched.
  172. *
  173. * Interrupts are off on entry.
  174. * Only called from user space.
  175. *
  176. * XXX if we had a free scratch register we could save the RSP into the stack frame
  177. * and report it properly in ps. Unfortunately we haven't.
  178. *
  179. * When user can change the frames always force IRET. That is because
  180. * it deals with uncanonical addresses better. SYSRET has trouble
  181. * with them due to bugs in both AMD and Intel CPUs.
  182. */
  183. ENTRY(system_call)
  184. CFI_STARTPROC simple
  185. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  186. CFI_REGISTER rip,rcx
  187. /*CFI_REGISTER rflags,r11*/
  188. swapgs
  189. movq %rsp,%gs:pda_oldrsp
  190. movq %gs:pda_kernelstack,%rsp
  191. /*
  192. * No need to follow this irqs off/on section - it's straight
  193. * and short:
  194. */
  195. sti
  196. SAVE_ARGS 8,1
  197. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  198. movq %rcx,RIP-ARGOFFSET(%rsp)
  199. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  200. GET_THREAD_INFO(%rcx)
  201. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  202. CFI_REMEMBER_STATE
  203. jnz tracesys
  204. cmpq $__NR_syscall_max,%rax
  205. ja badsys
  206. movq %r10,%rcx
  207. call *sys_call_table(,%rax,8) # XXX: rip relative
  208. movq %rax,RAX-ARGOFFSET(%rsp)
  209. /*
  210. * Syscall return path ending with SYSRET (fast path)
  211. * Has incomplete stack frame and undefined top of stack.
  212. */
  213. .globl ret_from_sys_call
  214. ret_from_sys_call:
  215. movl $_TIF_ALLWORK_MASK,%edi
  216. /* edi: flagmask */
  217. sysret_check:
  218. GET_THREAD_INFO(%rcx)
  219. cli
  220. TRACE_IRQS_OFF
  221. movl threadinfo_flags(%rcx),%edx
  222. andl %edi,%edx
  223. CFI_REMEMBER_STATE
  224. jnz sysret_careful
  225. /*
  226. * sysretq will re-enable interrupts:
  227. */
  228. TRACE_IRQS_ON
  229. movq RIP-ARGOFFSET(%rsp),%rcx
  230. CFI_REGISTER rip,rcx
  231. RESTORE_ARGS 0,-ARG_SKIP,1
  232. /*CFI_REGISTER rflags,r11*/
  233. movq %gs:pda_oldrsp,%rsp
  234. swapgs
  235. sysretq
  236. /* Handle reschedules */
  237. /* edx: work, edi: workmask */
  238. sysret_careful:
  239. CFI_RESTORE_STATE
  240. bt $TIF_NEED_RESCHED,%edx
  241. jnc sysret_signal
  242. TRACE_IRQS_ON
  243. sti
  244. pushq %rdi
  245. CFI_ADJUST_CFA_OFFSET 8
  246. call schedule
  247. popq %rdi
  248. CFI_ADJUST_CFA_OFFSET -8
  249. jmp sysret_check
  250. /* Handle a signal */
  251. sysret_signal:
  252. TRACE_IRQS_ON
  253. sti
  254. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  255. jz 1f
  256. /* Really a signal */
  257. /* edx: work flags (arg3) */
  258. leaq do_notify_resume(%rip),%rax
  259. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  260. xorl %esi,%esi # oldset -> arg2
  261. call ptregscall_common
  262. 1: movl $_TIF_NEED_RESCHED,%edi
  263. /* Use IRET because user could have changed frame. This
  264. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  265. cli
  266. TRACE_IRQS_OFF
  267. jmp int_with_check
  268. badsys:
  269. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  270. jmp ret_from_sys_call
  271. /* Do syscall tracing */
  272. tracesys:
  273. CFI_RESTORE_STATE
  274. SAVE_REST
  275. movq $-ENOSYS,RAX(%rsp)
  276. FIXUP_TOP_OF_STACK %rdi
  277. movq %rsp,%rdi
  278. call syscall_trace_enter
  279. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  280. RESTORE_REST
  281. cmpq $__NR_syscall_max,%rax
  282. ja 1f
  283. movq %r10,%rcx /* fixup for C */
  284. call *sys_call_table(,%rax,8)
  285. 1: movq %rax,RAX-ARGOFFSET(%rsp)
  286. /* Use IRET because user could have changed frame */
  287. jmp int_ret_from_sys_call
  288. CFI_ENDPROC
  289. END(system_call)
  290. /*
  291. * Syscall return path ending with IRET.
  292. * Has correct top of stack, but partial stack frame.
  293. */
  294. ENTRY(int_ret_from_sys_call)
  295. CFI_STARTPROC simple
  296. CFI_DEF_CFA rsp,SS+8-ARGOFFSET
  297. /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
  298. CFI_REL_OFFSET rsp,RSP-ARGOFFSET
  299. /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
  300. /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
  301. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  302. CFI_REL_OFFSET rdx,RDX-ARGOFFSET
  303. CFI_REL_OFFSET rcx,RCX-ARGOFFSET
  304. CFI_REL_OFFSET rax,RAX-ARGOFFSET
  305. CFI_REL_OFFSET rdi,RDI-ARGOFFSET
  306. CFI_REL_OFFSET rsi,RSI-ARGOFFSET
  307. CFI_REL_OFFSET r8,R8-ARGOFFSET
  308. CFI_REL_OFFSET r9,R9-ARGOFFSET
  309. CFI_REL_OFFSET r10,R10-ARGOFFSET
  310. CFI_REL_OFFSET r11,R11-ARGOFFSET
  311. cli
  312. TRACE_IRQS_OFF
  313. testl $3,CS-ARGOFFSET(%rsp)
  314. je retint_restore_args
  315. movl $_TIF_ALLWORK_MASK,%edi
  316. /* edi: mask to check */
  317. int_with_check:
  318. GET_THREAD_INFO(%rcx)
  319. movl threadinfo_flags(%rcx),%edx
  320. andl %edi,%edx
  321. jnz int_careful
  322. andl $~TS_COMPAT,threadinfo_status(%rcx)
  323. jmp retint_swapgs
  324. /* Either reschedule or signal or syscall exit tracking needed. */
  325. /* First do a reschedule test. */
  326. /* edx: work, edi: workmask */
  327. int_careful:
  328. bt $TIF_NEED_RESCHED,%edx
  329. jnc int_very_careful
  330. TRACE_IRQS_ON
  331. sti
  332. pushq %rdi
  333. CFI_ADJUST_CFA_OFFSET 8
  334. call schedule
  335. popq %rdi
  336. CFI_ADJUST_CFA_OFFSET -8
  337. cli
  338. TRACE_IRQS_OFF
  339. jmp int_with_check
  340. /* handle signals and tracing -- both require a full stack frame */
  341. int_very_careful:
  342. TRACE_IRQS_ON
  343. sti
  344. SAVE_REST
  345. /* Check for syscall exit trace */
  346. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  347. jz int_signal
  348. pushq %rdi
  349. CFI_ADJUST_CFA_OFFSET 8
  350. leaq 8(%rsp),%rdi # &ptregs -> arg1
  351. call syscall_trace_leave
  352. popq %rdi
  353. CFI_ADJUST_CFA_OFFSET -8
  354. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  355. cli
  356. TRACE_IRQS_OFF
  357. jmp int_restore_rest
  358. int_signal:
  359. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  360. jz 1f
  361. movq %rsp,%rdi # &ptregs -> arg1
  362. xorl %esi,%esi # oldset -> arg2
  363. call do_notify_resume
  364. 1: movl $_TIF_NEED_RESCHED,%edi
  365. int_restore_rest:
  366. RESTORE_REST
  367. cli
  368. TRACE_IRQS_OFF
  369. jmp int_with_check
  370. CFI_ENDPROC
  371. END(int_ret_from_sys_call)
  372. /*
  373. * Certain special system calls that need to save a complete full stack frame.
  374. */
  375. .macro PTREGSCALL label,func,arg
  376. .globl \label
  377. \label:
  378. leaq \func(%rip),%rax
  379. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  380. jmp ptregscall_common
  381. END(\label)
  382. .endm
  383. CFI_STARTPROC
  384. PTREGSCALL stub_clone, sys_clone, %r8
  385. PTREGSCALL stub_fork, sys_fork, %rdi
  386. PTREGSCALL stub_vfork, sys_vfork, %rdi
  387. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  388. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  389. PTREGSCALL stub_iopl, sys_iopl, %rsi
  390. ENTRY(ptregscall_common)
  391. popq %r11
  392. CFI_ADJUST_CFA_OFFSET -8
  393. CFI_REGISTER rip, r11
  394. SAVE_REST
  395. movq %r11, %r15
  396. CFI_REGISTER rip, r15
  397. FIXUP_TOP_OF_STACK %r11
  398. call *%rax
  399. RESTORE_TOP_OF_STACK %r11
  400. movq %r15, %r11
  401. CFI_REGISTER rip, r11
  402. RESTORE_REST
  403. pushq %r11
  404. CFI_ADJUST_CFA_OFFSET 8
  405. CFI_REL_OFFSET rip, 0
  406. ret
  407. CFI_ENDPROC
  408. END(ptregscall_common)
  409. ENTRY(stub_execve)
  410. CFI_STARTPROC
  411. popq %r11
  412. CFI_ADJUST_CFA_OFFSET -8
  413. CFI_REGISTER rip, r11
  414. SAVE_REST
  415. FIXUP_TOP_OF_STACK %r11
  416. call sys_execve
  417. RESTORE_TOP_OF_STACK %r11
  418. movq %rax,RAX(%rsp)
  419. RESTORE_REST
  420. jmp int_ret_from_sys_call
  421. CFI_ENDPROC
  422. END(stub_execve)
  423. /*
  424. * sigreturn is special because it needs to restore all registers on return.
  425. * This cannot be done with SYSRET, so use the IRET return path instead.
  426. */
  427. ENTRY(stub_rt_sigreturn)
  428. CFI_STARTPROC
  429. addq $8, %rsp
  430. CFI_ADJUST_CFA_OFFSET -8
  431. SAVE_REST
  432. movq %rsp,%rdi
  433. FIXUP_TOP_OF_STACK %r11
  434. call sys_rt_sigreturn
  435. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  436. RESTORE_REST
  437. jmp int_ret_from_sys_call
  438. CFI_ENDPROC
  439. END(stub_rt_sigreturn)
  440. /*
  441. * initial frame state for interrupts and exceptions
  442. */
  443. .macro _frame ref
  444. CFI_STARTPROC simple
  445. CFI_DEF_CFA rsp,SS+8-\ref
  446. /*CFI_REL_OFFSET ss,SS-\ref*/
  447. CFI_REL_OFFSET rsp,RSP-\ref
  448. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  449. /*CFI_REL_OFFSET cs,CS-\ref*/
  450. CFI_REL_OFFSET rip,RIP-\ref
  451. .endm
  452. /* initial frame state for interrupts (and exceptions without error code) */
  453. #define INTR_FRAME _frame RIP
  454. /* initial frame state for exceptions with error code (and interrupts with
  455. vector already pushed) */
  456. #define XCPT_FRAME _frame ORIG_RAX
  457. /*
  458. * Interrupt entry/exit.
  459. *
  460. * Interrupt entry points save only callee clobbered registers in fast path.
  461. *
  462. * Entry runs with interrupts off.
  463. */
  464. /* 0(%rsp): interrupt number */
  465. .macro interrupt func
  466. cld
  467. SAVE_ARGS
  468. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  469. pushq %rbp
  470. CFI_ADJUST_CFA_OFFSET 8
  471. CFI_REL_OFFSET rbp, 0
  472. movq %rsp,%rbp
  473. CFI_DEF_CFA_REGISTER rbp
  474. testl $3,CS(%rdi)
  475. je 1f
  476. swapgs
  477. 1: incl %gs:pda_irqcount # RED-PEN should check preempt count
  478. cmoveq %gs:pda_irqstackptr,%rsp
  479. push %rbp # backlink for old unwinder
  480. /*
  481. * We entered an interrupt context - irqs are off:
  482. */
  483. TRACE_IRQS_OFF
  484. call \func
  485. .endm
  486. ENTRY(common_interrupt)
  487. XCPT_FRAME
  488. interrupt do_IRQ
  489. /* 0(%rsp): oldrsp-ARGOFFSET */
  490. ret_from_intr:
  491. cli
  492. TRACE_IRQS_OFF
  493. decl %gs:pda_irqcount
  494. leaveq
  495. CFI_DEF_CFA_REGISTER rsp
  496. CFI_ADJUST_CFA_OFFSET -8
  497. exit_intr:
  498. GET_THREAD_INFO(%rcx)
  499. testl $3,CS-ARGOFFSET(%rsp)
  500. je retint_kernel
  501. /* Interrupt came from user space */
  502. /*
  503. * Has a correct top of stack, but a partial stack frame
  504. * %rcx: thread info. Interrupts off.
  505. */
  506. retint_with_reschedule:
  507. movl $_TIF_WORK_MASK,%edi
  508. retint_check:
  509. movl threadinfo_flags(%rcx),%edx
  510. andl %edi,%edx
  511. CFI_REMEMBER_STATE
  512. jnz retint_careful
  513. retint_swapgs:
  514. /*
  515. * The iretq could re-enable interrupts:
  516. */
  517. cli
  518. TRACE_IRQS_IRETQ
  519. swapgs
  520. jmp restore_args
  521. retint_restore_args:
  522. cli
  523. /*
  524. * The iretq could re-enable interrupts:
  525. */
  526. TRACE_IRQS_IRETQ
  527. restore_args:
  528. RESTORE_ARGS 0,8,0
  529. iret_label:
  530. iretq
  531. .section __ex_table,"a"
  532. .quad iret_label,bad_iret
  533. .previous
  534. .section .fixup,"ax"
  535. /* force a signal here? this matches i386 behaviour */
  536. /* running with kernel gs */
  537. bad_iret:
  538. movq $11,%rdi /* SIGSEGV */
  539. TRACE_IRQS_ON
  540. sti
  541. jmp do_exit
  542. .previous
  543. /* edi: workmask, edx: work */
  544. retint_careful:
  545. CFI_RESTORE_STATE
  546. bt $TIF_NEED_RESCHED,%edx
  547. jnc retint_signal
  548. TRACE_IRQS_ON
  549. sti
  550. pushq %rdi
  551. CFI_ADJUST_CFA_OFFSET 8
  552. call schedule
  553. popq %rdi
  554. CFI_ADJUST_CFA_OFFSET -8
  555. GET_THREAD_INFO(%rcx)
  556. cli
  557. TRACE_IRQS_OFF
  558. jmp retint_check
  559. retint_signal:
  560. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  561. jz retint_swapgs
  562. TRACE_IRQS_ON
  563. sti
  564. SAVE_REST
  565. movq $-1,ORIG_RAX(%rsp)
  566. xorl %esi,%esi # oldset
  567. movq %rsp,%rdi # &pt_regs
  568. call do_notify_resume
  569. RESTORE_REST
  570. cli
  571. TRACE_IRQS_OFF
  572. movl $_TIF_NEED_RESCHED,%edi
  573. GET_THREAD_INFO(%rcx)
  574. jmp retint_check
  575. #ifdef CONFIG_PREEMPT
  576. /* Returning to kernel space. Check if we need preemption */
  577. /* rcx: threadinfo. interrupts off. */
  578. ENTRY(retint_kernel)
  579. cmpl $0,threadinfo_preempt_count(%rcx)
  580. jnz retint_restore_args
  581. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  582. jnc retint_restore_args
  583. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  584. jnc retint_restore_args
  585. call preempt_schedule_irq
  586. jmp exit_intr
  587. #endif
  588. CFI_ENDPROC
  589. END(common_interrupt)
  590. /*
  591. * APIC interrupts.
  592. */
  593. .macro apicinterrupt num,func
  594. INTR_FRAME
  595. pushq $~(\num)
  596. CFI_ADJUST_CFA_OFFSET 8
  597. interrupt \func
  598. jmp ret_from_intr
  599. CFI_ENDPROC
  600. .endm
  601. ENTRY(thermal_interrupt)
  602. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  603. END(thermal_interrupt)
  604. ENTRY(threshold_interrupt)
  605. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  606. END(threshold_interrupt)
  607. #ifdef CONFIG_SMP
  608. ENTRY(reschedule_interrupt)
  609. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  610. END(reschedule_interrupt)
  611. .macro INVALIDATE_ENTRY num
  612. ENTRY(invalidate_interrupt\num)
  613. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  614. END(invalidate_interrupt\num)
  615. .endm
  616. INVALIDATE_ENTRY 0
  617. INVALIDATE_ENTRY 1
  618. INVALIDATE_ENTRY 2
  619. INVALIDATE_ENTRY 3
  620. INVALIDATE_ENTRY 4
  621. INVALIDATE_ENTRY 5
  622. INVALIDATE_ENTRY 6
  623. INVALIDATE_ENTRY 7
  624. ENTRY(call_function_interrupt)
  625. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  626. END(call_function_interrupt)
  627. #endif
  628. #ifdef CONFIG_X86_LOCAL_APIC
  629. ENTRY(apic_timer_interrupt)
  630. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  631. END(apic_timer_interrupt)
  632. ENTRY(error_interrupt)
  633. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  634. END(error_interrupt)
  635. ENTRY(spurious_interrupt)
  636. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  637. END(spurious_interrupt)
  638. #endif
  639. /*
  640. * Exception entry points.
  641. */
  642. .macro zeroentry sym
  643. INTR_FRAME
  644. pushq $0 /* push error code/oldrax */
  645. CFI_ADJUST_CFA_OFFSET 8
  646. pushq %rax /* push real oldrax to the rdi slot */
  647. CFI_ADJUST_CFA_OFFSET 8
  648. leaq \sym(%rip),%rax
  649. jmp error_entry
  650. CFI_ENDPROC
  651. .endm
  652. .macro errorentry sym
  653. XCPT_FRAME
  654. pushq %rax
  655. CFI_ADJUST_CFA_OFFSET 8
  656. leaq \sym(%rip),%rax
  657. jmp error_entry
  658. CFI_ENDPROC
  659. .endm
  660. /* error code is on the stack already */
  661. /* handle NMI like exceptions that can happen everywhere */
  662. .macro paranoidentry sym, ist=0, irqtrace=1
  663. SAVE_ALL
  664. cld
  665. movl $1,%ebx
  666. movl $MSR_GS_BASE,%ecx
  667. rdmsr
  668. testl %edx,%edx
  669. js 1f
  670. swapgs
  671. xorl %ebx,%ebx
  672. 1:
  673. .if \ist
  674. movq %gs:pda_data_offset, %rbp
  675. .endif
  676. movq %rsp,%rdi
  677. movq ORIG_RAX(%rsp),%rsi
  678. movq $-1,ORIG_RAX(%rsp)
  679. .if \ist
  680. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  681. .endif
  682. call \sym
  683. .if \ist
  684. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  685. .endif
  686. cli
  687. .if \irqtrace
  688. TRACE_IRQS_OFF
  689. .endif
  690. .endm
  691. /*
  692. * "Paranoid" exit path from exception stack.
  693. * Paranoid because this is used by NMIs and cannot take
  694. * any kernel state for granted.
  695. * We don't do kernel preemption checks here, because only
  696. * NMI should be common and it does not enable IRQs and
  697. * cannot get reschedule ticks.
  698. *
  699. * "trace" is 0 for the NMI handler only, because irq-tracing
  700. * is fundamentally NMI-unsafe. (we cannot change the soft and
  701. * hard flags at once, atomically)
  702. */
  703. .macro paranoidexit trace=1
  704. /* ebx: no swapgs flag */
  705. paranoid_exit\trace:
  706. testl %ebx,%ebx /* swapgs needed? */
  707. jnz paranoid_restore\trace
  708. testl $3,CS(%rsp)
  709. jnz paranoid_userspace\trace
  710. paranoid_swapgs\trace:
  711. TRACE_IRQS_IRETQ 0
  712. swapgs
  713. paranoid_restore\trace:
  714. RESTORE_ALL 8
  715. iretq
  716. paranoid_userspace\trace:
  717. GET_THREAD_INFO(%rcx)
  718. movl threadinfo_flags(%rcx),%ebx
  719. andl $_TIF_WORK_MASK,%ebx
  720. jz paranoid_swapgs\trace
  721. movq %rsp,%rdi /* &pt_regs */
  722. call sync_regs
  723. movq %rax,%rsp /* switch stack for scheduling */
  724. testl $_TIF_NEED_RESCHED,%ebx
  725. jnz paranoid_schedule\trace
  726. movl %ebx,%edx /* arg3: thread flags */
  727. .if \trace
  728. TRACE_IRQS_ON
  729. .endif
  730. sti
  731. xorl %esi,%esi /* arg2: oldset */
  732. movq %rsp,%rdi /* arg1: &pt_regs */
  733. call do_notify_resume
  734. cli
  735. .if \trace
  736. TRACE_IRQS_OFF
  737. .endif
  738. jmp paranoid_userspace\trace
  739. paranoid_schedule\trace:
  740. .if \trace
  741. TRACE_IRQS_ON
  742. .endif
  743. sti
  744. call schedule
  745. cli
  746. .if \trace
  747. TRACE_IRQS_OFF
  748. .endif
  749. jmp paranoid_userspace\trace
  750. CFI_ENDPROC
  751. .endm
  752. /*
  753. * Exception entry point. This expects an error code/orig_rax on the stack
  754. * and the exception handler in %rax.
  755. */
  756. ENTRY(error_entry)
  757. _frame RDI
  758. /* rdi slot contains rax, oldrax contains error code */
  759. cld
  760. subq $14*8,%rsp
  761. CFI_ADJUST_CFA_OFFSET (14*8)
  762. movq %rsi,13*8(%rsp)
  763. CFI_REL_OFFSET rsi,RSI
  764. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  765. movq %rdx,12*8(%rsp)
  766. CFI_REL_OFFSET rdx,RDX
  767. movq %rcx,11*8(%rsp)
  768. CFI_REL_OFFSET rcx,RCX
  769. movq %rsi,10*8(%rsp) /* store rax */
  770. CFI_REL_OFFSET rax,RAX
  771. movq %r8, 9*8(%rsp)
  772. CFI_REL_OFFSET r8,R8
  773. movq %r9, 8*8(%rsp)
  774. CFI_REL_OFFSET r9,R9
  775. movq %r10,7*8(%rsp)
  776. CFI_REL_OFFSET r10,R10
  777. movq %r11,6*8(%rsp)
  778. CFI_REL_OFFSET r11,R11
  779. movq %rbx,5*8(%rsp)
  780. CFI_REL_OFFSET rbx,RBX
  781. movq %rbp,4*8(%rsp)
  782. CFI_REL_OFFSET rbp,RBP
  783. movq %r12,3*8(%rsp)
  784. CFI_REL_OFFSET r12,R12
  785. movq %r13,2*8(%rsp)
  786. CFI_REL_OFFSET r13,R13
  787. movq %r14,1*8(%rsp)
  788. CFI_REL_OFFSET r14,R14
  789. movq %r15,(%rsp)
  790. CFI_REL_OFFSET r15,R15
  791. xorl %ebx,%ebx
  792. testl $3,CS(%rsp)
  793. je error_kernelspace
  794. error_swapgs:
  795. swapgs
  796. error_sti:
  797. movq %rdi,RDI(%rsp)
  798. movq %rsp,%rdi
  799. movq ORIG_RAX(%rsp),%rsi /* get error code */
  800. movq $-1,ORIG_RAX(%rsp)
  801. call *%rax
  802. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  803. error_exit:
  804. movl %ebx,%eax
  805. RESTORE_REST
  806. cli
  807. TRACE_IRQS_OFF
  808. GET_THREAD_INFO(%rcx)
  809. testl %eax,%eax
  810. jne retint_kernel
  811. movl threadinfo_flags(%rcx),%edx
  812. movl $_TIF_WORK_MASK,%edi
  813. andl %edi,%edx
  814. jnz retint_careful
  815. /*
  816. * The iret might restore flags:
  817. */
  818. TRACE_IRQS_IRETQ
  819. swapgs
  820. RESTORE_ARGS 0,8,0
  821. jmp iret_label
  822. CFI_ENDPROC
  823. error_kernelspace:
  824. incl %ebx
  825. /* There are two places in the kernel that can potentially fault with
  826. usergs. Handle them here. The exception handlers after
  827. iret run with kernel gs again, so don't set the user space flag.
  828. B stepping K8s sometimes report an truncated RIP for IRET
  829. exceptions returning to compat mode. Check for these here too. */
  830. leaq iret_label(%rip),%rbp
  831. cmpq %rbp,RIP(%rsp)
  832. je error_swapgs
  833. movl %ebp,%ebp /* zero extend */
  834. cmpq %rbp,RIP(%rsp)
  835. je error_swapgs
  836. cmpq $gs_change,RIP(%rsp)
  837. je error_swapgs
  838. jmp error_sti
  839. END(error_entry)
  840. /* Reload gs selector with exception handling */
  841. /* edi: new selector */
  842. ENTRY(load_gs_index)
  843. CFI_STARTPROC
  844. pushf
  845. CFI_ADJUST_CFA_OFFSET 8
  846. cli
  847. swapgs
  848. gs_change:
  849. movl %edi,%gs
  850. 2: mfence /* workaround */
  851. swapgs
  852. popf
  853. CFI_ADJUST_CFA_OFFSET -8
  854. ret
  855. CFI_ENDPROC
  856. ENDPROC(load_gs_index)
  857. .section __ex_table,"a"
  858. .align 8
  859. .quad gs_change,bad_gs
  860. .previous
  861. .section .fixup,"ax"
  862. /* running with kernelgs */
  863. bad_gs:
  864. swapgs /* switch back to user gs */
  865. xorl %eax,%eax
  866. movl %eax,%gs
  867. jmp 2b
  868. .previous
  869. /*
  870. * Create a kernel thread.
  871. *
  872. * C extern interface:
  873. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  874. *
  875. * asm input arguments:
  876. * rdi: fn, rsi: arg, rdx: flags
  877. */
  878. ENTRY(kernel_thread)
  879. CFI_STARTPROC
  880. FAKE_STACK_FRAME $child_rip
  881. SAVE_ALL
  882. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  883. movq %rdx,%rdi
  884. orq kernel_thread_flags(%rip),%rdi
  885. movq $-1, %rsi
  886. movq %rsp, %rdx
  887. xorl %r8d,%r8d
  888. xorl %r9d,%r9d
  889. # clone now
  890. call do_fork
  891. movq %rax,RAX(%rsp)
  892. xorl %edi,%edi
  893. /*
  894. * It isn't worth to check for reschedule here,
  895. * so internally to the x86_64 port you can rely on kernel_thread()
  896. * not to reschedule the child before returning, this avoids the need
  897. * of hacks for example to fork off the per-CPU idle tasks.
  898. * [Hopefully no generic code relies on the reschedule -AK]
  899. */
  900. RESTORE_ALL
  901. UNFAKE_STACK_FRAME
  902. ret
  903. CFI_ENDPROC
  904. ENDPROC(kernel_thread)
  905. child_rip:
  906. pushq $0 # fake return address
  907. CFI_STARTPROC
  908. /*
  909. * Here we are in the child and the registers are set as they were
  910. * at kernel_thread() invocation in the parent.
  911. */
  912. movq %rdi, %rax
  913. movq %rsi, %rdi
  914. call *%rax
  915. # exit
  916. xorl %edi, %edi
  917. call do_exit
  918. CFI_ENDPROC
  919. ENDPROC(child_rip)
  920. /*
  921. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  922. *
  923. * C extern interface:
  924. * extern long execve(char *name, char **argv, char **envp)
  925. *
  926. * asm input arguments:
  927. * rdi: name, rsi: argv, rdx: envp
  928. *
  929. * We want to fallback into:
  930. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  931. *
  932. * do_sys_execve asm fallback arguments:
  933. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  934. */
  935. ENTRY(execve)
  936. CFI_STARTPROC
  937. FAKE_STACK_FRAME $0
  938. SAVE_ALL
  939. call sys_execve
  940. movq %rax, RAX(%rsp)
  941. RESTORE_REST
  942. testq %rax,%rax
  943. je int_ret_from_sys_call
  944. RESTORE_ARGS
  945. UNFAKE_STACK_FRAME
  946. ret
  947. CFI_ENDPROC
  948. ENDPROC(execve)
  949. KPROBE_ENTRY(page_fault)
  950. errorentry do_page_fault
  951. END(page_fault)
  952. .previous .text
  953. ENTRY(coprocessor_error)
  954. zeroentry do_coprocessor_error
  955. END(coprocessor_error)
  956. ENTRY(simd_coprocessor_error)
  957. zeroentry do_simd_coprocessor_error
  958. END(simd_coprocessor_error)
  959. ENTRY(device_not_available)
  960. zeroentry math_state_restore
  961. END(device_not_available)
  962. /* runs on exception stack */
  963. KPROBE_ENTRY(debug)
  964. INTR_FRAME
  965. pushq $0
  966. CFI_ADJUST_CFA_OFFSET 8
  967. paranoidentry do_debug, DEBUG_STACK
  968. paranoidexit
  969. END(debug)
  970. .previous .text
  971. /* runs on exception stack */
  972. KPROBE_ENTRY(nmi)
  973. INTR_FRAME
  974. pushq $-1
  975. CFI_ADJUST_CFA_OFFSET 8
  976. paranoidentry do_nmi, 0, 0
  977. #ifdef CONFIG_TRACE_IRQFLAGS
  978. paranoidexit 0
  979. #else
  980. jmp paranoid_exit1
  981. CFI_ENDPROC
  982. #endif
  983. END(nmi)
  984. .previous .text
  985. KPROBE_ENTRY(int3)
  986. INTR_FRAME
  987. pushq $0
  988. CFI_ADJUST_CFA_OFFSET 8
  989. paranoidentry do_int3, DEBUG_STACK
  990. jmp paranoid_exit1
  991. CFI_ENDPROC
  992. END(int3)
  993. .previous .text
  994. ENTRY(overflow)
  995. zeroentry do_overflow
  996. END(overflow)
  997. ENTRY(bounds)
  998. zeroentry do_bounds
  999. END(bounds)
  1000. ENTRY(invalid_op)
  1001. zeroentry do_invalid_op
  1002. END(invalid_op)
  1003. ENTRY(coprocessor_segment_overrun)
  1004. zeroentry do_coprocessor_segment_overrun
  1005. END(coprocessor_segment_overrun)
  1006. ENTRY(reserved)
  1007. zeroentry do_reserved
  1008. END(reserved)
  1009. /* runs on exception stack */
  1010. ENTRY(double_fault)
  1011. XCPT_FRAME
  1012. paranoidentry do_double_fault
  1013. jmp paranoid_exit1
  1014. CFI_ENDPROC
  1015. END(double_fault)
  1016. ENTRY(invalid_TSS)
  1017. errorentry do_invalid_TSS
  1018. END(invalid_TSS)
  1019. ENTRY(segment_not_present)
  1020. errorentry do_segment_not_present
  1021. END(segment_not_present)
  1022. /* runs on exception stack */
  1023. ENTRY(stack_segment)
  1024. XCPT_FRAME
  1025. paranoidentry do_stack_segment
  1026. jmp paranoid_exit1
  1027. CFI_ENDPROC
  1028. END(stack_segment)
  1029. KPROBE_ENTRY(general_protection)
  1030. errorentry do_general_protection
  1031. END(general_protection)
  1032. .previous .text
  1033. ENTRY(alignment_check)
  1034. errorentry do_alignment_check
  1035. END(alignment_check)
  1036. ENTRY(divide_error)
  1037. zeroentry do_divide_error
  1038. END(divide_error)
  1039. ENTRY(spurious_interrupt_bug)
  1040. zeroentry do_spurious_interrupt_bug
  1041. END(spurious_interrupt_bug)
  1042. #ifdef CONFIG_X86_MCE
  1043. /* runs on exception stack */
  1044. ENTRY(machine_check)
  1045. INTR_FRAME
  1046. pushq $0
  1047. CFI_ADJUST_CFA_OFFSET 8
  1048. paranoidentry do_machine_check
  1049. jmp paranoid_exit1
  1050. CFI_ENDPROC
  1051. END(machine_check)
  1052. #endif
  1053. /* Call softirq on interrupt stack. Interrupts are off. */
  1054. ENTRY(call_softirq)
  1055. CFI_STARTPROC
  1056. push %rbp
  1057. CFI_ADJUST_CFA_OFFSET 8
  1058. CFI_REL_OFFSET rbp,0
  1059. mov %rsp,%rbp
  1060. CFI_DEF_CFA_REGISTER rbp
  1061. incl %gs:pda_irqcount
  1062. cmove %gs:pda_irqstackptr,%rsp
  1063. push %rbp # backlink for old unwinder
  1064. call __do_softirq
  1065. leaveq
  1066. CFI_DEF_CFA_REGISTER rsp
  1067. CFI_ADJUST_CFA_OFFSET -8
  1068. decl %gs:pda_irqcount
  1069. ret
  1070. CFI_ENDPROC
  1071. ENDPROC(call_softirq)
  1072. #ifdef CONFIG_STACK_UNWIND
  1073. ENTRY(arch_unwind_init_running)
  1074. CFI_STARTPROC
  1075. movq %r15, R15(%rdi)
  1076. movq %r14, R14(%rdi)
  1077. xchgq %rsi, %rdx
  1078. movq %r13, R13(%rdi)
  1079. movq %r12, R12(%rdi)
  1080. xorl %eax, %eax
  1081. movq %rbp, RBP(%rdi)
  1082. movq %rbx, RBX(%rdi)
  1083. movq (%rsp), %rcx
  1084. movq %rax, R11(%rdi)
  1085. movq %rax, R10(%rdi)
  1086. movq %rax, R9(%rdi)
  1087. movq %rax, R8(%rdi)
  1088. movq %rax, RAX(%rdi)
  1089. movq %rax, RCX(%rdi)
  1090. movq %rax, RDX(%rdi)
  1091. movq %rax, RSI(%rdi)
  1092. movq %rax, RDI(%rdi)
  1093. movq %rax, ORIG_RAX(%rdi)
  1094. movq %rcx, RIP(%rdi)
  1095. leaq 8(%rsp), %rcx
  1096. movq $__KERNEL_CS, CS(%rdi)
  1097. movq %rax, EFLAGS(%rdi)
  1098. movq %rcx, RSP(%rdi)
  1099. movq $__KERNEL_DS, SS(%rdi)
  1100. jmpq *%rdx
  1101. CFI_ENDPROC
  1102. ENDPROC(arch_unwind_init_running)
  1103. #endif