entry.S 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #define ASSEMBLY 1
  29. #include <linux/linkage.h>
  30. #include <asm/segment.h>
  31. #include <asm/smp.h>
  32. #include <asm/cache.h>
  33. #include <asm/errno.h>
  34. #include <asm/dwarf2.h>
  35. #include <asm/calling.h>
  36. #include <asm/asm-offsets.h>
  37. #include <asm/msr.h>
  38. #include <asm/unistd.h>
  39. #include <asm/thread_info.h>
  40. #include <asm/hw_irq.h>
  41. #include <asm/page.h>
  42. #include <asm/irqflags.h>
  43. .code64
  44. #ifndef CONFIG_PREEMPT
  45. #define retint_kernel retint_restore_args
  46. #endif
  47. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  48. #ifdef CONFIG_TRACE_IRQFLAGS
  49. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  50. jnc 1f
  51. TRACE_IRQS_ON
  52. 1:
  53. #endif
  54. .endm
  55. /*
  56. * C code is not supposed to know about undefined top of stack. Every time
  57. * a C function with an pt_regs argument is called from the SYSCALL based
  58. * fast path FIXUP_TOP_OF_STACK is needed.
  59. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  60. * manipulation.
  61. */
  62. /* %rsp:at FRAMEEND */
  63. .macro FIXUP_TOP_OF_STACK tmp
  64. movq %gs:pda_oldrsp,\tmp
  65. movq \tmp,RSP(%rsp)
  66. movq $__USER_DS,SS(%rsp)
  67. movq $__USER_CS,CS(%rsp)
  68. movq $-1,RCX(%rsp)
  69. movq R11(%rsp),\tmp /* get eflags */
  70. movq \tmp,EFLAGS(%rsp)
  71. .endm
  72. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  73. movq RSP-\offset(%rsp),\tmp
  74. movq \tmp,%gs:pda_oldrsp
  75. movq EFLAGS-\offset(%rsp),\tmp
  76. movq \tmp,R11-\offset(%rsp)
  77. .endm
  78. .macro FAKE_STACK_FRAME child_rip
  79. /* push in order ss, rsp, eflags, cs, rip */
  80. xorl %eax, %eax
  81. pushq %rax /* ss */
  82. CFI_ADJUST_CFA_OFFSET 8
  83. /*CFI_REL_OFFSET ss,0*/
  84. pushq %rax /* rsp */
  85. CFI_ADJUST_CFA_OFFSET 8
  86. CFI_REL_OFFSET rsp,0
  87. pushq $(1<<9) /* eflags - interrupts on */
  88. CFI_ADJUST_CFA_OFFSET 8
  89. /*CFI_REL_OFFSET rflags,0*/
  90. pushq $__KERNEL_CS /* cs */
  91. CFI_ADJUST_CFA_OFFSET 8
  92. /*CFI_REL_OFFSET cs,0*/
  93. pushq \child_rip /* rip */
  94. CFI_ADJUST_CFA_OFFSET 8
  95. CFI_REL_OFFSET rip,0
  96. pushq %rax /* orig rax */
  97. CFI_ADJUST_CFA_OFFSET 8
  98. .endm
  99. .macro UNFAKE_STACK_FRAME
  100. addq $8*6, %rsp
  101. CFI_ADJUST_CFA_OFFSET -(6*8)
  102. .endm
  103. .macro CFI_DEFAULT_STACK start=1
  104. .if \start
  105. CFI_STARTPROC simple
  106. CFI_DEF_CFA rsp,SS+8
  107. .else
  108. CFI_DEF_CFA_OFFSET SS+8
  109. .endif
  110. CFI_REL_OFFSET r15,R15
  111. CFI_REL_OFFSET r14,R14
  112. CFI_REL_OFFSET r13,R13
  113. CFI_REL_OFFSET r12,R12
  114. CFI_REL_OFFSET rbp,RBP
  115. CFI_REL_OFFSET rbx,RBX
  116. CFI_REL_OFFSET r11,R11
  117. CFI_REL_OFFSET r10,R10
  118. CFI_REL_OFFSET r9,R9
  119. CFI_REL_OFFSET r8,R8
  120. CFI_REL_OFFSET rax,RAX
  121. CFI_REL_OFFSET rcx,RCX
  122. CFI_REL_OFFSET rdx,RDX
  123. CFI_REL_OFFSET rsi,RSI
  124. CFI_REL_OFFSET rdi,RDI
  125. CFI_REL_OFFSET rip,RIP
  126. /*CFI_REL_OFFSET cs,CS*/
  127. /*CFI_REL_OFFSET rflags,EFLAGS*/
  128. CFI_REL_OFFSET rsp,RSP
  129. /*CFI_REL_OFFSET ss,SS*/
  130. .endm
  131. /*
  132. * A newly forked process directly context switches into this.
  133. */
  134. /* rdi: prev */
  135. ENTRY(ret_from_fork)
  136. CFI_DEFAULT_STACK
  137. call schedule_tail
  138. GET_THREAD_INFO(%rcx)
  139. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  140. jnz rff_trace
  141. rff_action:
  142. RESTORE_REST
  143. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  144. je int_ret_from_sys_call
  145. testl $_TIF_IA32,threadinfo_flags(%rcx)
  146. jnz int_ret_from_sys_call
  147. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  148. jmp ret_from_sys_call
  149. rff_trace:
  150. movq %rsp,%rdi
  151. call syscall_trace_leave
  152. GET_THREAD_INFO(%rcx)
  153. jmp rff_action
  154. CFI_ENDPROC
  155. END(ret_from_fork)
  156. /*
  157. * System call entry. Upto 6 arguments in registers are supported.
  158. *
  159. * SYSCALL does not save anything on the stack and does not change the
  160. * stack pointer.
  161. */
  162. /*
  163. * Register setup:
  164. * rax system call number
  165. * rdi arg0
  166. * rcx return address for syscall/sysret, C arg3
  167. * rsi arg1
  168. * rdx arg2
  169. * r10 arg3 (--> moved to rcx for C)
  170. * r8 arg4
  171. * r9 arg5
  172. * r11 eflags for syscall/sysret, temporary for C
  173. * r12-r15,rbp,rbx saved by C code, not touched.
  174. *
  175. * Interrupts are off on entry.
  176. * Only called from user space.
  177. *
  178. * XXX if we had a free scratch register we could save the RSP into the stack frame
  179. * and report it properly in ps. Unfortunately we haven't.
  180. *
  181. * When user can change the frames always force IRET. That is because
  182. * it deals with uncanonical addresses better. SYSRET has trouble
  183. * with them due to bugs in both AMD and Intel CPUs.
  184. */
  185. ENTRY(system_call)
  186. CFI_STARTPROC simple
  187. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  188. CFI_REGISTER rip,rcx
  189. /*CFI_REGISTER rflags,r11*/
  190. swapgs
  191. movq %rsp,%gs:pda_oldrsp
  192. movq %gs:pda_kernelstack,%rsp
  193. /*
  194. * No need to follow this irqs off/on section - it's straight
  195. * and short:
  196. */
  197. sti
  198. SAVE_ARGS 8,1
  199. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  200. movq %rcx,RIP-ARGOFFSET(%rsp)
  201. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  202. GET_THREAD_INFO(%rcx)
  203. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  204. CFI_REMEMBER_STATE
  205. jnz tracesys
  206. cmpq $__NR_syscall_max,%rax
  207. ja badsys
  208. movq %r10,%rcx
  209. call *sys_call_table(,%rax,8) # XXX: rip relative
  210. movq %rax,RAX-ARGOFFSET(%rsp)
  211. /*
  212. * Syscall return path ending with SYSRET (fast path)
  213. * Has incomplete stack frame and undefined top of stack.
  214. */
  215. .globl ret_from_sys_call
  216. ret_from_sys_call:
  217. movl $_TIF_ALLWORK_MASK,%edi
  218. /* edi: flagmask */
  219. sysret_check:
  220. GET_THREAD_INFO(%rcx)
  221. cli
  222. TRACE_IRQS_OFF
  223. movl threadinfo_flags(%rcx),%edx
  224. andl %edi,%edx
  225. CFI_REMEMBER_STATE
  226. jnz sysret_careful
  227. /*
  228. * sysretq will re-enable interrupts:
  229. */
  230. TRACE_IRQS_ON
  231. movq RIP-ARGOFFSET(%rsp),%rcx
  232. CFI_REGISTER rip,rcx
  233. RESTORE_ARGS 0,-ARG_SKIP,1
  234. /*CFI_REGISTER rflags,r11*/
  235. movq %gs:pda_oldrsp,%rsp
  236. swapgs
  237. sysretq
  238. /* Handle reschedules */
  239. /* edx: work, edi: workmask */
  240. sysret_careful:
  241. CFI_RESTORE_STATE
  242. bt $TIF_NEED_RESCHED,%edx
  243. jnc sysret_signal
  244. TRACE_IRQS_ON
  245. sti
  246. pushq %rdi
  247. CFI_ADJUST_CFA_OFFSET 8
  248. call schedule
  249. popq %rdi
  250. CFI_ADJUST_CFA_OFFSET -8
  251. jmp sysret_check
  252. /* Handle a signal */
  253. sysret_signal:
  254. TRACE_IRQS_ON
  255. sti
  256. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  257. jz 1f
  258. /* Really a signal */
  259. /* edx: work flags (arg3) */
  260. leaq do_notify_resume(%rip),%rax
  261. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  262. xorl %esi,%esi # oldset -> arg2
  263. call ptregscall_common
  264. 1: movl $_TIF_NEED_RESCHED,%edi
  265. /* Use IRET because user could have changed frame. This
  266. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  267. cli
  268. TRACE_IRQS_OFF
  269. jmp int_with_check
  270. badsys:
  271. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  272. jmp ret_from_sys_call
  273. /* Do syscall tracing */
  274. tracesys:
  275. CFI_RESTORE_STATE
  276. SAVE_REST
  277. movq $-ENOSYS,RAX(%rsp)
  278. FIXUP_TOP_OF_STACK %rdi
  279. movq %rsp,%rdi
  280. call syscall_trace_enter
  281. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  282. RESTORE_REST
  283. cmpq $__NR_syscall_max,%rax
  284. ja 1f
  285. movq %r10,%rcx /* fixup for C */
  286. call *sys_call_table(,%rax,8)
  287. 1: movq %rax,RAX-ARGOFFSET(%rsp)
  288. /* Use IRET because user could have changed frame */
  289. jmp int_ret_from_sys_call
  290. CFI_ENDPROC
  291. END(system_call)
  292. /*
  293. * Syscall return path ending with IRET.
  294. * Has correct top of stack, but partial stack frame.
  295. */
  296. ENTRY(int_ret_from_sys_call)
  297. CFI_STARTPROC simple
  298. CFI_DEF_CFA rsp,SS+8-ARGOFFSET
  299. /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
  300. CFI_REL_OFFSET rsp,RSP-ARGOFFSET
  301. /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
  302. /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
  303. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  304. CFI_REL_OFFSET rdx,RDX-ARGOFFSET
  305. CFI_REL_OFFSET rcx,RCX-ARGOFFSET
  306. CFI_REL_OFFSET rax,RAX-ARGOFFSET
  307. CFI_REL_OFFSET rdi,RDI-ARGOFFSET
  308. CFI_REL_OFFSET rsi,RSI-ARGOFFSET
  309. CFI_REL_OFFSET r8,R8-ARGOFFSET
  310. CFI_REL_OFFSET r9,R9-ARGOFFSET
  311. CFI_REL_OFFSET r10,R10-ARGOFFSET
  312. CFI_REL_OFFSET r11,R11-ARGOFFSET
  313. cli
  314. TRACE_IRQS_OFF
  315. testl $3,CS-ARGOFFSET(%rsp)
  316. je retint_restore_args
  317. movl $_TIF_ALLWORK_MASK,%edi
  318. /* edi: mask to check */
  319. int_with_check:
  320. GET_THREAD_INFO(%rcx)
  321. movl threadinfo_flags(%rcx),%edx
  322. andl %edi,%edx
  323. jnz int_careful
  324. andl $~TS_COMPAT,threadinfo_status(%rcx)
  325. jmp retint_swapgs
  326. /* Either reschedule or signal or syscall exit tracking needed. */
  327. /* First do a reschedule test. */
  328. /* edx: work, edi: workmask */
  329. int_careful:
  330. bt $TIF_NEED_RESCHED,%edx
  331. jnc int_very_careful
  332. TRACE_IRQS_ON
  333. sti
  334. pushq %rdi
  335. CFI_ADJUST_CFA_OFFSET 8
  336. call schedule
  337. popq %rdi
  338. CFI_ADJUST_CFA_OFFSET -8
  339. cli
  340. TRACE_IRQS_OFF
  341. jmp int_with_check
  342. /* handle signals and tracing -- both require a full stack frame */
  343. int_very_careful:
  344. TRACE_IRQS_ON
  345. sti
  346. SAVE_REST
  347. /* Check for syscall exit trace */
  348. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  349. jz int_signal
  350. pushq %rdi
  351. CFI_ADJUST_CFA_OFFSET 8
  352. leaq 8(%rsp),%rdi # &ptregs -> arg1
  353. call syscall_trace_leave
  354. popq %rdi
  355. CFI_ADJUST_CFA_OFFSET -8
  356. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  357. cli
  358. TRACE_IRQS_OFF
  359. jmp int_restore_rest
  360. int_signal:
  361. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  362. jz 1f
  363. movq %rsp,%rdi # &ptregs -> arg1
  364. xorl %esi,%esi # oldset -> arg2
  365. call do_notify_resume
  366. 1: movl $_TIF_NEED_RESCHED,%edi
  367. int_restore_rest:
  368. RESTORE_REST
  369. cli
  370. TRACE_IRQS_OFF
  371. jmp int_with_check
  372. CFI_ENDPROC
  373. END(int_ret_from_sys_call)
  374. /*
  375. * Certain special system calls that need to save a complete full stack frame.
  376. */
  377. .macro PTREGSCALL label,func,arg
  378. .globl \label
  379. \label:
  380. leaq \func(%rip),%rax
  381. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  382. jmp ptregscall_common
  383. END(\label)
  384. .endm
  385. CFI_STARTPROC
  386. PTREGSCALL stub_clone, sys_clone, %r8
  387. PTREGSCALL stub_fork, sys_fork, %rdi
  388. PTREGSCALL stub_vfork, sys_vfork, %rdi
  389. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  390. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  391. PTREGSCALL stub_iopl, sys_iopl, %rsi
  392. ENTRY(ptregscall_common)
  393. popq %r11
  394. CFI_ADJUST_CFA_OFFSET -8
  395. CFI_REGISTER rip, r11
  396. SAVE_REST
  397. movq %r11, %r15
  398. CFI_REGISTER rip, r15
  399. FIXUP_TOP_OF_STACK %r11
  400. call *%rax
  401. RESTORE_TOP_OF_STACK %r11
  402. movq %r15, %r11
  403. CFI_REGISTER rip, r11
  404. RESTORE_REST
  405. pushq %r11
  406. CFI_ADJUST_CFA_OFFSET 8
  407. CFI_REL_OFFSET rip, 0
  408. ret
  409. CFI_ENDPROC
  410. END(ptregscall_common)
  411. ENTRY(stub_execve)
  412. CFI_STARTPROC
  413. popq %r11
  414. CFI_ADJUST_CFA_OFFSET -8
  415. CFI_REGISTER rip, r11
  416. SAVE_REST
  417. FIXUP_TOP_OF_STACK %r11
  418. call sys_execve
  419. RESTORE_TOP_OF_STACK %r11
  420. movq %rax,RAX(%rsp)
  421. RESTORE_REST
  422. jmp int_ret_from_sys_call
  423. CFI_ENDPROC
  424. END(stub_execve)
  425. /*
  426. * sigreturn is special because it needs to restore all registers on return.
  427. * This cannot be done with SYSRET, so use the IRET return path instead.
  428. */
  429. ENTRY(stub_rt_sigreturn)
  430. CFI_STARTPROC
  431. addq $8, %rsp
  432. CFI_ADJUST_CFA_OFFSET -8
  433. SAVE_REST
  434. movq %rsp,%rdi
  435. FIXUP_TOP_OF_STACK %r11
  436. call sys_rt_sigreturn
  437. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  438. RESTORE_REST
  439. jmp int_ret_from_sys_call
  440. CFI_ENDPROC
  441. END(stub_rt_sigreturn)
  442. /*
  443. * initial frame state for interrupts and exceptions
  444. */
  445. .macro _frame ref
  446. CFI_STARTPROC simple
  447. CFI_DEF_CFA rsp,SS+8-\ref
  448. /*CFI_REL_OFFSET ss,SS-\ref*/
  449. CFI_REL_OFFSET rsp,RSP-\ref
  450. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  451. /*CFI_REL_OFFSET cs,CS-\ref*/
  452. CFI_REL_OFFSET rip,RIP-\ref
  453. .endm
  454. /* initial frame state for interrupts (and exceptions without error code) */
  455. #define INTR_FRAME _frame RIP
  456. /* initial frame state for exceptions with error code (and interrupts with
  457. vector already pushed) */
  458. #define XCPT_FRAME _frame ORIG_RAX
  459. /*
  460. * Interrupt entry/exit.
  461. *
  462. * Interrupt entry points save only callee clobbered registers in fast path.
  463. *
  464. * Entry runs with interrupts off.
  465. */
  466. /* 0(%rsp): interrupt number */
  467. .macro interrupt func
  468. cld
  469. SAVE_ARGS
  470. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  471. pushq %rbp
  472. CFI_ADJUST_CFA_OFFSET 8
  473. CFI_REL_OFFSET rbp, 0
  474. movq %rsp,%rbp
  475. CFI_DEF_CFA_REGISTER rbp
  476. testl $3,CS(%rdi)
  477. je 1f
  478. swapgs
  479. 1: incl %gs:pda_irqcount # RED-PEN should check preempt count
  480. cmoveq %gs:pda_irqstackptr,%rsp
  481. push %rbp # backlink for old unwinder
  482. /*
  483. * We entered an interrupt context - irqs are off:
  484. */
  485. TRACE_IRQS_OFF
  486. call \func
  487. .endm
  488. ENTRY(common_interrupt)
  489. XCPT_FRAME
  490. interrupt do_IRQ
  491. /* 0(%rsp): oldrsp-ARGOFFSET */
  492. ret_from_intr:
  493. cli
  494. TRACE_IRQS_OFF
  495. decl %gs:pda_irqcount
  496. leaveq
  497. CFI_DEF_CFA_REGISTER rsp
  498. CFI_ADJUST_CFA_OFFSET -8
  499. exit_intr:
  500. GET_THREAD_INFO(%rcx)
  501. testl $3,CS-ARGOFFSET(%rsp)
  502. je retint_kernel
  503. /* Interrupt came from user space */
  504. /*
  505. * Has a correct top of stack, but a partial stack frame
  506. * %rcx: thread info. Interrupts off.
  507. */
  508. retint_with_reschedule:
  509. movl $_TIF_WORK_MASK,%edi
  510. retint_check:
  511. movl threadinfo_flags(%rcx),%edx
  512. andl %edi,%edx
  513. CFI_REMEMBER_STATE
  514. jnz retint_careful
  515. retint_swapgs:
  516. /*
  517. * The iretq could re-enable interrupts:
  518. */
  519. cli
  520. TRACE_IRQS_IRETQ
  521. swapgs
  522. jmp restore_args
  523. retint_restore_args:
  524. cli
  525. /*
  526. * The iretq could re-enable interrupts:
  527. */
  528. TRACE_IRQS_IRETQ
  529. restore_args:
  530. RESTORE_ARGS 0,8,0
  531. iret_label:
  532. iretq
  533. .section __ex_table,"a"
  534. .quad iret_label,bad_iret
  535. .previous
  536. .section .fixup,"ax"
  537. /* force a signal here? this matches i386 behaviour */
  538. /* running with kernel gs */
  539. bad_iret:
  540. movq $11,%rdi /* SIGSEGV */
  541. TRACE_IRQS_ON
  542. sti
  543. jmp do_exit
  544. .previous
  545. /* edi: workmask, edx: work */
  546. retint_careful:
  547. CFI_RESTORE_STATE
  548. bt $TIF_NEED_RESCHED,%edx
  549. jnc retint_signal
  550. TRACE_IRQS_ON
  551. sti
  552. pushq %rdi
  553. CFI_ADJUST_CFA_OFFSET 8
  554. call schedule
  555. popq %rdi
  556. CFI_ADJUST_CFA_OFFSET -8
  557. GET_THREAD_INFO(%rcx)
  558. cli
  559. TRACE_IRQS_OFF
  560. jmp retint_check
  561. retint_signal:
  562. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  563. jz retint_swapgs
  564. TRACE_IRQS_ON
  565. sti
  566. SAVE_REST
  567. movq $-1,ORIG_RAX(%rsp)
  568. xorl %esi,%esi # oldset
  569. movq %rsp,%rdi # &pt_regs
  570. call do_notify_resume
  571. RESTORE_REST
  572. cli
  573. TRACE_IRQS_OFF
  574. movl $_TIF_NEED_RESCHED,%edi
  575. GET_THREAD_INFO(%rcx)
  576. jmp retint_check
  577. #ifdef CONFIG_PREEMPT
  578. /* Returning to kernel space. Check if we need preemption */
  579. /* rcx: threadinfo. interrupts off. */
  580. .p2align
  581. retint_kernel:
  582. cmpl $0,threadinfo_preempt_count(%rcx)
  583. jnz retint_restore_args
  584. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  585. jnc retint_restore_args
  586. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  587. jnc retint_restore_args
  588. call preempt_schedule_irq
  589. jmp exit_intr
  590. #endif
  591. CFI_ENDPROC
  592. END(common_interrupt)
  593. /*
  594. * APIC interrupts.
  595. */
  596. .macro apicinterrupt num,func
  597. INTR_FRAME
  598. pushq $~(\num)
  599. CFI_ADJUST_CFA_OFFSET 8
  600. interrupt \func
  601. jmp ret_from_intr
  602. CFI_ENDPROC
  603. .endm
  604. ENTRY(thermal_interrupt)
  605. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  606. END(thermal_interrupt)
  607. ENTRY(threshold_interrupt)
  608. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  609. END(threshold_interrupt)
  610. #ifdef CONFIG_SMP
  611. ENTRY(reschedule_interrupt)
  612. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  613. END(reschedule_interrupt)
  614. .macro INVALIDATE_ENTRY num
  615. ENTRY(invalidate_interrupt\num)
  616. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  617. END(invalidate_interrupt\num)
  618. .endm
  619. INVALIDATE_ENTRY 0
  620. INVALIDATE_ENTRY 1
  621. INVALIDATE_ENTRY 2
  622. INVALIDATE_ENTRY 3
  623. INVALIDATE_ENTRY 4
  624. INVALIDATE_ENTRY 5
  625. INVALIDATE_ENTRY 6
  626. INVALIDATE_ENTRY 7
  627. ENTRY(call_function_interrupt)
  628. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  629. END(call_function_interrupt)
  630. #endif
  631. #ifdef CONFIG_X86_LOCAL_APIC
  632. ENTRY(apic_timer_interrupt)
  633. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  634. END(apic_timer_interrupt)
  635. ENTRY(error_interrupt)
  636. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  637. END(error_interrupt)
  638. ENTRY(spurious_interrupt)
  639. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  640. END(spurious_interrupt)
  641. #endif
  642. /*
  643. * Exception entry points.
  644. */
  645. .macro zeroentry sym
  646. INTR_FRAME
  647. pushq $0 /* push error code/oldrax */
  648. CFI_ADJUST_CFA_OFFSET 8
  649. pushq %rax /* push real oldrax to the rdi slot */
  650. CFI_ADJUST_CFA_OFFSET 8
  651. leaq \sym(%rip),%rax
  652. jmp error_entry
  653. CFI_ENDPROC
  654. .endm
  655. .macro errorentry sym
  656. XCPT_FRAME
  657. pushq %rax
  658. CFI_ADJUST_CFA_OFFSET 8
  659. leaq \sym(%rip),%rax
  660. jmp error_entry
  661. CFI_ENDPROC
  662. .endm
  663. /* error code is on the stack already */
  664. /* handle NMI like exceptions that can happen everywhere */
  665. .macro paranoidentry sym, ist=0, irqtrace=1
  666. SAVE_ALL
  667. cld
  668. movl $1,%ebx
  669. movl $MSR_GS_BASE,%ecx
  670. rdmsr
  671. testl %edx,%edx
  672. js 1f
  673. swapgs
  674. xorl %ebx,%ebx
  675. 1:
  676. .if \ist
  677. movq %gs:pda_data_offset, %rbp
  678. .endif
  679. movq %rsp,%rdi
  680. movq ORIG_RAX(%rsp),%rsi
  681. movq $-1,ORIG_RAX(%rsp)
  682. .if \ist
  683. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  684. .endif
  685. call \sym
  686. .if \ist
  687. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  688. .endif
  689. cli
  690. .if \irqtrace
  691. TRACE_IRQS_OFF
  692. .endif
  693. .endm
  694. /*
  695. * "Paranoid" exit path from exception stack.
  696. * Paranoid because this is used by NMIs and cannot take
  697. * any kernel state for granted.
  698. * We don't do kernel preemption checks here, because only
  699. * NMI should be common and it does not enable IRQs and
  700. * cannot get reschedule ticks.
  701. *
  702. * "trace" is 0 for the NMI handler only, because irq-tracing
  703. * is fundamentally NMI-unsafe. (we cannot change the soft and
  704. * hard flags at once, atomically)
  705. */
  706. .macro paranoidexit trace=1
  707. /* ebx: no swapgs flag */
  708. paranoid_exit\trace:
  709. testl %ebx,%ebx /* swapgs needed? */
  710. jnz paranoid_restore\trace
  711. testl $3,CS(%rsp)
  712. jnz paranoid_userspace\trace
  713. paranoid_swapgs\trace:
  714. TRACE_IRQS_IRETQ 0
  715. swapgs
  716. paranoid_restore\trace:
  717. RESTORE_ALL 8
  718. iretq
  719. paranoid_userspace\trace:
  720. GET_THREAD_INFO(%rcx)
  721. movl threadinfo_flags(%rcx),%ebx
  722. andl $_TIF_WORK_MASK,%ebx
  723. jz paranoid_swapgs\trace
  724. movq %rsp,%rdi /* &pt_regs */
  725. call sync_regs
  726. movq %rax,%rsp /* switch stack for scheduling */
  727. testl $_TIF_NEED_RESCHED,%ebx
  728. jnz paranoid_schedule\trace
  729. movl %ebx,%edx /* arg3: thread flags */
  730. .if \trace
  731. TRACE_IRQS_ON
  732. .endif
  733. sti
  734. xorl %esi,%esi /* arg2: oldset */
  735. movq %rsp,%rdi /* arg1: &pt_regs */
  736. call do_notify_resume
  737. cli
  738. .if \trace
  739. TRACE_IRQS_OFF
  740. .endif
  741. jmp paranoid_userspace\trace
  742. paranoid_schedule\trace:
  743. .if \trace
  744. TRACE_IRQS_ON
  745. .endif
  746. sti
  747. call schedule
  748. cli
  749. .if \trace
  750. TRACE_IRQS_OFF
  751. .endif
  752. jmp paranoid_userspace\trace
  753. CFI_ENDPROC
  754. .endm
  755. /*
  756. * Exception entry point. This expects an error code/orig_rax on the stack
  757. * and the exception handler in %rax.
  758. */
  759. ENTRY(error_entry)
  760. _frame RDI
  761. /* rdi slot contains rax, oldrax contains error code */
  762. cld
  763. subq $14*8,%rsp
  764. CFI_ADJUST_CFA_OFFSET (14*8)
  765. movq %rsi,13*8(%rsp)
  766. CFI_REL_OFFSET rsi,RSI
  767. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  768. movq %rdx,12*8(%rsp)
  769. CFI_REL_OFFSET rdx,RDX
  770. movq %rcx,11*8(%rsp)
  771. CFI_REL_OFFSET rcx,RCX
  772. movq %rsi,10*8(%rsp) /* store rax */
  773. CFI_REL_OFFSET rax,RAX
  774. movq %r8, 9*8(%rsp)
  775. CFI_REL_OFFSET r8,R8
  776. movq %r9, 8*8(%rsp)
  777. CFI_REL_OFFSET r9,R9
  778. movq %r10,7*8(%rsp)
  779. CFI_REL_OFFSET r10,R10
  780. movq %r11,6*8(%rsp)
  781. CFI_REL_OFFSET r11,R11
  782. movq %rbx,5*8(%rsp)
  783. CFI_REL_OFFSET rbx,RBX
  784. movq %rbp,4*8(%rsp)
  785. CFI_REL_OFFSET rbp,RBP
  786. movq %r12,3*8(%rsp)
  787. CFI_REL_OFFSET r12,R12
  788. movq %r13,2*8(%rsp)
  789. CFI_REL_OFFSET r13,R13
  790. movq %r14,1*8(%rsp)
  791. CFI_REL_OFFSET r14,R14
  792. movq %r15,(%rsp)
  793. CFI_REL_OFFSET r15,R15
  794. xorl %ebx,%ebx
  795. testl $3,CS(%rsp)
  796. je error_kernelspace
  797. error_swapgs:
  798. swapgs
  799. error_sti:
  800. movq %rdi,RDI(%rsp)
  801. movq %rsp,%rdi
  802. movq ORIG_RAX(%rsp),%rsi /* get error code */
  803. movq $-1,ORIG_RAX(%rsp)
  804. call *%rax
  805. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  806. error_exit:
  807. movl %ebx,%eax
  808. RESTORE_REST
  809. cli
  810. TRACE_IRQS_OFF
  811. GET_THREAD_INFO(%rcx)
  812. testl %eax,%eax
  813. jne retint_kernel
  814. movl threadinfo_flags(%rcx),%edx
  815. movl $_TIF_WORK_MASK,%edi
  816. andl %edi,%edx
  817. jnz retint_careful
  818. /*
  819. * The iret might restore flags:
  820. */
  821. TRACE_IRQS_IRETQ
  822. swapgs
  823. RESTORE_ARGS 0,8,0
  824. jmp iret_label
  825. CFI_ENDPROC
  826. error_kernelspace:
  827. incl %ebx
  828. /* There are two places in the kernel that can potentially fault with
  829. usergs. Handle them here. The exception handlers after
  830. iret run with kernel gs again, so don't set the user space flag.
  831. B stepping K8s sometimes report an truncated RIP for IRET
  832. exceptions returning to compat mode. Check for these here too. */
  833. leaq iret_label(%rip),%rbp
  834. cmpq %rbp,RIP(%rsp)
  835. je error_swapgs
  836. movl %ebp,%ebp /* zero extend */
  837. cmpq %rbp,RIP(%rsp)
  838. je error_swapgs
  839. cmpq $gs_change,RIP(%rsp)
  840. je error_swapgs
  841. jmp error_sti
  842. END(error_entry)
  843. /* Reload gs selector with exception handling */
  844. /* edi: new selector */
  845. ENTRY(load_gs_index)
  846. CFI_STARTPROC
  847. pushf
  848. CFI_ADJUST_CFA_OFFSET 8
  849. cli
  850. swapgs
  851. gs_change:
  852. movl %edi,%gs
  853. 2: mfence /* workaround */
  854. swapgs
  855. popf
  856. CFI_ADJUST_CFA_OFFSET -8
  857. ret
  858. CFI_ENDPROC
  859. ENDPROC(load_gs_index)
  860. .section __ex_table,"a"
  861. .align 8
  862. .quad gs_change,bad_gs
  863. .previous
  864. .section .fixup,"ax"
  865. /* running with kernelgs */
  866. bad_gs:
  867. swapgs /* switch back to user gs */
  868. xorl %eax,%eax
  869. movl %eax,%gs
  870. jmp 2b
  871. .previous
  872. /*
  873. * Create a kernel thread.
  874. *
  875. * C extern interface:
  876. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  877. *
  878. * asm input arguments:
  879. * rdi: fn, rsi: arg, rdx: flags
  880. */
  881. ENTRY(kernel_thread)
  882. CFI_STARTPROC
  883. FAKE_STACK_FRAME $child_rip
  884. SAVE_ALL
  885. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  886. movq %rdx,%rdi
  887. orq kernel_thread_flags(%rip),%rdi
  888. movq $-1, %rsi
  889. movq %rsp, %rdx
  890. xorl %r8d,%r8d
  891. xorl %r9d,%r9d
  892. # clone now
  893. call do_fork
  894. movq %rax,RAX(%rsp)
  895. xorl %edi,%edi
  896. /*
  897. * It isn't worth to check for reschedule here,
  898. * so internally to the x86_64 port you can rely on kernel_thread()
  899. * not to reschedule the child before returning, this avoids the need
  900. * of hacks for example to fork off the per-CPU idle tasks.
  901. * [Hopefully no generic code relies on the reschedule -AK]
  902. */
  903. RESTORE_ALL
  904. UNFAKE_STACK_FRAME
  905. ret
  906. CFI_ENDPROC
  907. ENDPROC(kernel_thread)
  908. child_rip:
  909. pushq $0 # fake return address
  910. CFI_STARTPROC
  911. /*
  912. * Here we are in the child and the registers are set as they were
  913. * at kernel_thread() invocation in the parent.
  914. */
  915. movq %rdi, %rax
  916. movq %rsi, %rdi
  917. call *%rax
  918. # exit
  919. xorl %edi, %edi
  920. call do_exit
  921. CFI_ENDPROC
  922. ENDPROC(child_rip)
  923. /*
  924. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  925. *
  926. * C extern interface:
  927. * extern long execve(char *name, char **argv, char **envp)
  928. *
  929. * asm input arguments:
  930. * rdi: name, rsi: argv, rdx: envp
  931. *
  932. * We want to fallback into:
  933. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  934. *
  935. * do_sys_execve asm fallback arguments:
  936. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  937. */
  938. ENTRY(execve)
  939. CFI_STARTPROC
  940. FAKE_STACK_FRAME $0
  941. SAVE_ALL
  942. call sys_execve
  943. movq %rax, RAX(%rsp)
  944. RESTORE_REST
  945. testq %rax,%rax
  946. je int_ret_from_sys_call
  947. RESTORE_ARGS
  948. UNFAKE_STACK_FRAME
  949. ret
  950. CFI_ENDPROC
  951. ENDPROC(execve)
  952. KPROBE_ENTRY(page_fault)
  953. errorentry do_page_fault
  954. END(page_fault)
  955. .previous .text
  956. ENTRY(coprocessor_error)
  957. zeroentry do_coprocessor_error
  958. END(coprocessor_error)
  959. ENTRY(simd_coprocessor_error)
  960. zeroentry do_simd_coprocessor_error
  961. END(simd_coprocessor_error)
  962. ENTRY(device_not_available)
  963. zeroentry math_state_restore
  964. END(device_not_available)
  965. /* runs on exception stack */
  966. KPROBE_ENTRY(debug)
  967. INTR_FRAME
  968. pushq $0
  969. CFI_ADJUST_CFA_OFFSET 8
  970. paranoidentry do_debug, DEBUG_STACK
  971. paranoidexit
  972. END(debug)
  973. .previous .text
  974. /* runs on exception stack */
  975. KPROBE_ENTRY(nmi)
  976. INTR_FRAME
  977. pushq $-1
  978. CFI_ADJUST_CFA_OFFSET 8
  979. paranoidentry do_nmi, 0, 0
  980. #ifdef CONFIG_TRACE_IRQFLAGS
  981. paranoidexit 0
  982. #else
  983. jmp paranoid_exit1
  984. CFI_ENDPROC
  985. #endif
  986. END(nmi)
  987. .previous .text
  988. KPROBE_ENTRY(int3)
  989. INTR_FRAME
  990. pushq $0
  991. CFI_ADJUST_CFA_OFFSET 8
  992. paranoidentry do_int3, DEBUG_STACK
  993. jmp paranoid_exit1
  994. CFI_ENDPROC
  995. END(int3)
  996. .previous .text
  997. ENTRY(overflow)
  998. zeroentry do_overflow
  999. END(overflow)
  1000. ENTRY(bounds)
  1001. zeroentry do_bounds
  1002. END(bounds)
  1003. ENTRY(invalid_op)
  1004. zeroentry do_invalid_op
  1005. END(invalid_op)
  1006. ENTRY(coprocessor_segment_overrun)
  1007. zeroentry do_coprocessor_segment_overrun
  1008. END(coprocessor_segment_overrun)
  1009. ENTRY(reserved)
  1010. zeroentry do_reserved
  1011. END(reserved)
  1012. /* runs on exception stack */
  1013. ENTRY(double_fault)
  1014. XCPT_FRAME
  1015. paranoidentry do_double_fault
  1016. jmp paranoid_exit1
  1017. CFI_ENDPROC
  1018. END(double_fault)
  1019. ENTRY(invalid_TSS)
  1020. errorentry do_invalid_TSS
  1021. END(invalid_TSS)
  1022. ENTRY(segment_not_present)
  1023. errorentry do_segment_not_present
  1024. END(segment_not_present)
  1025. /* runs on exception stack */
  1026. ENTRY(stack_segment)
  1027. XCPT_FRAME
  1028. paranoidentry do_stack_segment
  1029. jmp paranoid_exit1
  1030. CFI_ENDPROC
  1031. END(stack_segment)
  1032. KPROBE_ENTRY(general_protection)
  1033. errorentry do_general_protection
  1034. END(general_protection)
  1035. .previous .text
  1036. ENTRY(alignment_check)
  1037. errorentry do_alignment_check
  1038. END(alignment_check)
  1039. ENTRY(divide_error)
  1040. zeroentry do_divide_error
  1041. END(divide_error)
  1042. ENTRY(spurious_interrupt_bug)
  1043. zeroentry do_spurious_interrupt_bug
  1044. END(spurious_interrupt_bug)
  1045. #ifdef CONFIG_X86_MCE
  1046. /* runs on exception stack */
  1047. ENTRY(machine_check)
  1048. INTR_FRAME
  1049. pushq $0
  1050. CFI_ADJUST_CFA_OFFSET 8
  1051. paranoidentry do_machine_check
  1052. jmp paranoid_exit1
  1053. CFI_ENDPROC
  1054. END(machine_check)
  1055. #endif
  1056. /* Call softirq on interrupt stack. Interrupts are off. */
  1057. ENTRY(call_softirq)
  1058. CFI_STARTPROC
  1059. push %rbp
  1060. CFI_ADJUST_CFA_OFFSET 8
  1061. CFI_REL_OFFSET rbp,0
  1062. mov %rsp,%rbp
  1063. CFI_DEF_CFA_REGISTER rbp
  1064. incl %gs:pda_irqcount
  1065. cmove %gs:pda_irqstackptr,%rsp
  1066. push %rbp # backlink for old unwinder
  1067. call __do_softirq
  1068. leaveq
  1069. CFI_DEF_CFA_REGISTER rsp
  1070. CFI_ADJUST_CFA_OFFSET -8
  1071. decl %gs:pda_irqcount
  1072. ret
  1073. CFI_ENDPROC
  1074. ENDPROC(call_softirq)
  1075. #ifdef CONFIG_STACK_UNWIND
  1076. ENTRY(arch_unwind_init_running)
  1077. CFI_STARTPROC
  1078. movq %r15, R15(%rdi)
  1079. movq %r14, R14(%rdi)
  1080. xchgq %rsi, %rdx
  1081. movq %r13, R13(%rdi)
  1082. movq %r12, R12(%rdi)
  1083. xorl %eax, %eax
  1084. movq %rbp, RBP(%rdi)
  1085. movq %rbx, RBX(%rdi)
  1086. movq (%rsp), %rcx
  1087. movq %rax, R11(%rdi)
  1088. movq %rax, R10(%rdi)
  1089. movq %rax, R9(%rdi)
  1090. movq %rax, R8(%rdi)
  1091. movq %rax, RAX(%rdi)
  1092. movq %rax, RCX(%rdi)
  1093. movq %rax, RDX(%rdi)
  1094. movq %rax, RSI(%rdi)
  1095. movq %rax, RDI(%rdi)
  1096. movq %rax, ORIG_RAX(%rdi)
  1097. movq %rcx, RIP(%rdi)
  1098. leaq 8(%rsp), %rcx
  1099. movq $__KERNEL_CS, CS(%rdi)
  1100. movq %rax, EFLAGS(%rdi)
  1101. movq %rcx, RSP(%rdi)
  1102. movq $__KERNEL_DS, SS(%rdi)
  1103. jmpq *%rdx
  1104. CFI_ENDPROC
  1105. ENDPROC(arch_unwind_init_running)
  1106. #endif