entry_64.S 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. */
  8. /*
  9. * entry.S contains the system-call and fault low-level handling routines.
  10. *
  11. * NOTE: This code handles signal-recognition, which happens every time
  12. * after an interrupt and after each system call.
  13. *
  14. * Normal syscalls and interrupts don't save a full stack frame, this is
  15. * only done for syscall tracing, signals or fork/exec et.al.
  16. *
  17. * A note on terminology:
  18. * - top of stack: Architecture defined interrupt frame from SS to RIP
  19. * at the top of the kernel process stack.
  20. * - partial stack frame: partially saved registers upto R11.
  21. * - full stack frame: Like partial stack frame, but all register saved.
  22. *
  23. * Some macro usage:
  24. * - CFI macros are used to generate dwarf2 unwind information for better
  25. * backtraces. They don't change any code.
  26. * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
  27. * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
  28. * There are unfortunately lots of special cases where some registers
  29. * not touched. The macro is a big mess that should be cleaned up.
  30. * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  31. * Gives a full stack frame.
  32. * - ENTRY/END Define functions in the symbol table.
  33. * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  34. * frame that is otherwise undefined after a SYSCALL
  35. * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  36. * - errorentry/paranoidentry/zeroentry - Define exception entry points.
  37. */
  38. #include <linux/linkage.h>
  39. #include <asm/segment.h>
  40. #include <asm/cache.h>
  41. #include <asm/errno.h>
  42. #include <asm/dwarf2.h>
  43. #include <asm/calling.h>
  44. #include <asm/asm-offsets.h>
  45. #include <asm/msr.h>
  46. #include <asm/unistd.h>
  47. #include <asm/thread_info.h>
  48. #include <asm/hw_irq.h>
  49. #include <asm/page.h>
  50. #include <asm/irqflags.h>
  51. #include <asm/paravirt.h>
  52. #include <asm/ftrace.h>
  53. /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
  54. #include <linux/elf-em.h>
  55. #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
  56. #define __AUDIT_ARCH_64BIT 0x80000000
  57. #define __AUDIT_ARCH_LE 0x40000000
  58. .code64
  59. #ifdef CONFIG_FTRACE
  60. #ifdef CONFIG_DYNAMIC_FTRACE
  61. ENTRY(mcount)
  62. subq $0x38, %rsp
  63. movq %rax, (%rsp)
  64. movq %rcx, 8(%rsp)
  65. movq %rdx, 16(%rsp)
  66. movq %rsi, 24(%rsp)
  67. movq %rdi, 32(%rsp)
  68. movq %r8, 40(%rsp)
  69. movq %r9, 48(%rsp)
  70. movq 0x38(%rsp), %rdi
  71. subq $MCOUNT_INSN_SIZE, %rdi
  72. .globl mcount_call
  73. mcount_call:
  74. call ftrace_stub
  75. movq 48(%rsp), %r9
  76. movq 40(%rsp), %r8
  77. movq 32(%rsp), %rdi
  78. movq 24(%rsp), %rsi
  79. movq 16(%rsp), %rdx
  80. movq 8(%rsp), %rcx
  81. movq (%rsp), %rax
  82. addq $0x38, %rsp
  83. retq
  84. END(mcount)
  85. ENTRY(ftrace_caller)
  86. /* taken from glibc */
  87. subq $0x38, %rsp
  88. movq %rax, (%rsp)
  89. movq %rcx, 8(%rsp)
  90. movq %rdx, 16(%rsp)
  91. movq %rsi, 24(%rsp)
  92. movq %rdi, 32(%rsp)
  93. movq %r8, 40(%rsp)
  94. movq %r9, 48(%rsp)
  95. movq 0x38(%rsp), %rdi
  96. movq 8(%rbp), %rsi
  97. subq $MCOUNT_INSN_SIZE, %rdi
  98. .globl ftrace_call
  99. ftrace_call:
  100. call ftrace_stub
  101. movq 48(%rsp), %r9
  102. movq 40(%rsp), %r8
  103. movq 32(%rsp), %rdi
  104. movq 24(%rsp), %rsi
  105. movq 16(%rsp), %rdx
  106. movq 8(%rsp), %rcx
  107. movq (%rsp), %rax
  108. addq $0x38, %rsp
  109. .globl ftrace_stub
  110. ftrace_stub:
  111. retq
  112. END(ftrace_caller)
  113. #else /* ! CONFIG_DYNAMIC_FTRACE */
  114. ENTRY(mcount)
  115. cmpq $ftrace_stub, ftrace_trace_function
  116. jnz trace
  117. .globl ftrace_stub
  118. ftrace_stub:
  119. retq
  120. trace:
  121. /* taken from glibc */
  122. subq $0x38, %rsp
  123. movq %rax, (%rsp)
  124. movq %rcx, 8(%rsp)
  125. movq %rdx, 16(%rsp)
  126. movq %rsi, 24(%rsp)
  127. movq %rdi, 32(%rsp)
  128. movq %r8, 40(%rsp)
  129. movq %r9, 48(%rsp)
  130. movq 0x38(%rsp), %rdi
  131. movq 8(%rbp), %rsi
  132. subq $MCOUNT_INSN_SIZE, %rdi
  133. call *ftrace_trace_function
  134. movq 48(%rsp), %r9
  135. movq 40(%rsp), %r8
  136. movq 32(%rsp), %rdi
  137. movq 24(%rsp), %rsi
  138. movq 16(%rsp), %rdx
  139. movq 8(%rsp), %rcx
  140. movq (%rsp), %rax
  141. addq $0x38, %rsp
  142. jmp ftrace_stub
  143. END(mcount)
  144. #endif /* CONFIG_DYNAMIC_FTRACE */
  145. #endif /* CONFIG_FTRACE */
  146. #ifndef CONFIG_PREEMPT
  147. #define retint_kernel retint_restore_args
  148. #endif
  149. #ifdef CONFIG_PARAVIRT
  150. ENTRY(native_usergs_sysret64)
  151. swapgs
  152. sysretq
  153. #endif /* CONFIG_PARAVIRT */
  154. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  155. #ifdef CONFIG_TRACE_IRQFLAGS
  156. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  157. jnc 1f
  158. TRACE_IRQS_ON
  159. 1:
  160. #endif
  161. .endm
  162. /*
  163. * C code is not supposed to know about undefined top of stack. Every time
  164. * a C function with an pt_regs argument is called from the SYSCALL based
  165. * fast path FIXUP_TOP_OF_STACK is needed.
  166. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  167. * manipulation.
  168. */
  169. /* %rsp:at FRAMEEND */
  170. .macro FIXUP_TOP_OF_STACK tmp
  171. movq %gs:pda_oldrsp,\tmp
  172. movq \tmp,RSP(%rsp)
  173. movq $__USER_DS,SS(%rsp)
  174. movq $__USER_CS,CS(%rsp)
  175. movq $-1,RCX(%rsp)
  176. movq R11(%rsp),\tmp /* get eflags */
  177. movq \tmp,EFLAGS(%rsp)
  178. .endm
  179. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  180. movq RSP-\offset(%rsp),\tmp
  181. movq \tmp,%gs:pda_oldrsp
  182. movq EFLAGS-\offset(%rsp),\tmp
  183. movq \tmp,R11-\offset(%rsp)
  184. .endm
  185. .macro FAKE_STACK_FRAME child_rip
  186. /* push in order ss, rsp, eflags, cs, rip */
  187. xorl %eax, %eax
  188. pushq $__KERNEL_DS /* ss */
  189. CFI_ADJUST_CFA_OFFSET 8
  190. /*CFI_REL_OFFSET ss,0*/
  191. pushq %rax /* rsp */
  192. CFI_ADJUST_CFA_OFFSET 8
  193. CFI_REL_OFFSET rsp,0
  194. pushq $(1<<9) /* eflags - interrupts on */
  195. CFI_ADJUST_CFA_OFFSET 8
  196. /*CFI_REL_OFFSET rflags,0*/
  197. pushq $__KERNEL_CS /* cs */
  198. CFI_ADJUST_CFA_OFFSET 8
  199. /*CFI_REL_OFFSET cs,0*/
  200. pushq \child_rip /* rip */
  201. CFI_ADJUST_CFA_OFFSET 8
  202. CFI_REL_OFFSET rip,0
  203. pushq %rax /* orig rax */
  204. CFI_ADJUST_CFA_OFFSET 8
  205. .endm
  206. .macro UNFAKE_STACK_FRAME
  207. addq $8*6, %rsp
  208. CFI_ADJUST_CFA_OFFSET -(6*8)
  209. .endm
  210. .macro CFI_DEFAULT_STACK start=1
  211. .if \start
  212. CFI_STARTPROC simple
  213. CFI_SIGNAL_FRAME
  214. CFI_DEF_CFA rsp,SS+8
  215. .else
  216. CFI_DEF_CFA_OFFSET SS+8
  217. .endif
  218. CFI_REL_OFFSET r15,R15
  219. CFI_REL_OFFSET r14,R14
  220. CFI_REL_OFFSET r13,R13
  221. CFI_REL_OFFSET r12,R12
  222. CFI_REL_OFFSET rbp,RBP
  223. CFI_REL_OFFSET rbx,RBX
  224. CFI_REL_OFFSET r11,R11
  225. CFI_REL_OFFSET r10,R10
  226. CFI_REL_OFFSET r9,R9
  227. CFI_REL_OFFSET r8,R8
  228. CFI_REL_OFFSET rax,RAX
  229. CFI_REL_OFFSET rcx,RCX
  230. CFI_REL_OFFSET rdx,RDX
  231. CFI_REL_OFFSET rsi,RSI
  232. CFI_REL_OFFSET rdi,RDI
  233. CFI_REL_OFFSET rip,RIP
  234. /*CFI_REL_OFFSET cs,CS*/
  235. /*CFI_REL_OFFSET rflags,EFLAGS*/
  236. CFI_REL_OFFSET rsp,RSP
  237. /*CFI_REL_OFFSET ss,SS*/
  238. .endm
  239. /*
  240. * A newly forked process directly context switches into this.
  241. */
  242. /* rdi: prev */
  243. ENTRY(ret_from_fork)
  244. CFI_DEFAULT_STACK
  245. push kernel_eflags(%rip)
  246. CFI_ADJUST_CFA_OFFSET 8
  247. popf # reset kernel eflags
  248. CFI_ADJUST_CFA_OFFSET -8
  249. call schedule_tail
  250. GET_THREAD_INFO(%rcx)
  251. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  252. jnz rff_trace
  253. rff_action:
  254. RESTORE_REST
  255. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  256. je int_ret_from_sys_call
  257. testl $_TIF_IA32,TI_flags(%rcx)
  258. jnz int_ret_from_sys_call
  259. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  260. jmp ret_from_sys_call
  261. rff_trace:
  262. movq %rsp,%rdi
  263. call syscall_trace_leave
  264. GET_THREAD_INFO(%rcx)
  265. jmp rff_action
  266. CFI_ENDPROC
  267. END(ret_from_fork)
  268. /*
  269. * System call entry. Upto 6 arguments in registers are supported.
  270. *
  271. * SYSCALL does not save anything on the stack and does not change the
  272. * stack pointer.
  273. */
  274. /*
  275. * Register setup:
  276. * rax system call number
  277. * rdi arg0
  278. * rcx return address for syscall/sysret, C arg3
  279. * rsi arg1
  280. * rdx arg2
  281. * r10 arg3 (--> moved to rcx for C)
  282. * r8 arg4
  283. * r9 arg5
  284. * r11 eflags for syscall/sysret, temporary for C
  285. * r12-r15,rbp,rbx saved by C code, not touched.
  286. *
  287. * Interrupts are off on entry.
  288. * Only called from user space.
  289. *
  290. * XXX if we had a free scratch register we could save the RSP into the stack frame
  291. * and report it properly in ps. Unfortunately we haven't.
  292. *
  293. * When user can change the frames always force IRET. That is because
  294. * it deals with uncanonical addresses better. SYSRET has trouble
  295. * with them due to bugs in both AMD and Intel CPUs.
  296. */
  297. ENTRY(system_call)
  298. CFI_STARTPROC simple
  299. CFI_SIGNAL_FRAME
  300. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  301. CFI_REGISTER rip,rcx
  302. /*CFI_REGISTER rflags,r11*/
  303. SWAPGS_UNSAFE_STACK
  304. /*
  305. * A hypervisor implementation might want to use a label
  306. * after the swapgs, so that it can do the swapgs
  307. * for the guest and jump here on syscall.
  308. */
  309. ENTRY(system_call_after_swapgs)
  310. movq %rsp,%gs:pda_oldrsp
  311. movq %gs:pda_kernelstack,%rsp
  312. /*
  313. * No need to follow this irqs off/on section - it's straight
  314. * and short:
  315. */
  316. ENABLE_INTERRUPTS(CLBR_NONE)
  317. SAVE_ARGS 8,1
  318. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  319. movq %rcx,RIP-ARGOFFSET(%rsp)
  320. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  321. GET_THREAD_INFO(%rcx)
  322. testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
  323. jnz tracesys
  324. system_call_fastpath:
  325. cmpq $__NR_syscall_max,%rax
  326. ja badsys
  327. movq %r10,%rcx
  328. call *sys_call_table(,%rax,8) # XXX: rip relative
  329. movq %rax,RAX-ARGOFFSET(%rsp)
  330. /*
  331. * Syscall return path ending with SYSRET (fast path)
  332. * Has incomplete stack frame and undefined top of stack.
  333. */
  334. ret_from_sys_call:
  335. movl $_TIF_ALLWORK_MASK,%edi
  336. /* edi: flagmask */
  337. sysret_check:
  338. LOCKDEP_SYS_EXIT
  339. GET_THREAD_INFO(%rcx)
  340. DISABLE_INTERRUPTS(CLBR_NONE)
  341. TRACE_IRQS_OFF
  342. movl TI_flags(%rcx),%edx
  343. andl %edi,%edx
  344. jnz sysret_careful
  345. CFI_REMEMBER_STATE
  346. /*
  347. * sysretq will re-enable interrupts:
  348. */
  349. TRACE_IRQS_ON
  350. movq RIP-ARGOFFSET(%rsp),%rcx
  351. CFI_REGISTER rip,rcx
  352. RESTORE_ARGS 0,-ARG_SKIP,1
  353. /*CFI_REGISTER rflags,r11*/
  354. movq %gs:pda_oldrsp, %rsp
  355. USERGS_SYSRET64
  356. CFI_RESTORE_STATE
  357. /* Handle reschedules */
  358. /* edx: work, edi: workmask */
  359. sysret_careful:
  360. bt $TIF_NEED_RESCHED,%edx
  361. jnc sysret_signal
  362. TRACE_IRQS_ON
  363. ENABLE_INTERRUPTS(CLBR_NONE)
  364. pushq %rdi
  365. CFI_ADJUST_CFA_OFFSET 8
  366. call schedule
  367. popq %rdi
  368. CFI_ADJUST_CFA_OFFSET -8
  369. jmp sysret_check
  370. /* Handle a signal */
  371. sysret_signal:
  372. TRACE_IRQS_ON
  373. ENABLE_INTERRUPTS(CLBR_NONE)
  374. #ifdef CONFIG_AUDITSYSCALL
  375. bt $TIF_SYSCALL_AUDIT,%edx
  376. jc sysret_audit
  377. #endif
  378. /* edx: work flags (arg3) */
  379. leaq do_notify_resume(%rip),%rax
  380. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  381. xorl %esi,%esi # oldset -> arg2
  382. call ptregscall_common
  383. movl $_TIF_WORK_MASK,%edi
  384. /* Use IRET because user could have changed frame. This
  385. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  386. DISABLE_INTERRUPTS(CLBR_NONE)
  387. TRACE_IRQS_OFF
  388. jmp int_with_check
  389. badsys:
  390. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  391. jmp ret_from_sys_call
  392. #ifdef CONFIG_AUDITSYSCALL
  393. /*
  394. * Fast path for syscall audit without full syscall trace.
  395. * We just call audit_syscall_entry() directly, and then
  396. * jump back to the normal fast path.
  397. */
  398. auditsys:
  399. movq %r10,%r9 /* 6th arg: 4th syscall arg */
  400. movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
  401. movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
  402. movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
  403. movq %rax,%rsi /* 2nd arg: syscall number */
  404. movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
  405. call audit_syscall_entry
  406. LOAD_ARGS 0 /* reload call-clobbered registers */
  407. jmp system_call_fastpath
  408. /*
  409. * Return fast path for syscall audit. Call audit_syscall_exit()
  410. * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
  411. * masked off.
  412. */
  413. sysret_audit:
  414. movq %rax,%rsi /* second arg, syscall return value */
  415. cmpq $0,%rax /* is it < 0? */
  416. setl %al /* 1 if so, 0 if not */
  417. movzbl %al,%edi /* zero-extend that into %edi */
  418. inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
  419. call audit_syscall_exit
  420. movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
  421. jmp sysret_check
  422. #endif /* CONFIG_AUDITSYSCALL */
  423. /* Do syscall tracing */
  424. tracesys:
  425. #ifdef CONFIG_AUDITSYSCALL
  426. testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  427. jz auditsys
  428. #endif
  429. SAVE_REST
  430. movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
  431. FIXUP_TOP_OF_STACK %rdi
  432. movq %rsp,%rdi
  433. call syscall_trace_enter
  434. /*
  435. * Reload arg registers from stack in case ptrace changed them.
  436. * We don't reload %rax because syscall_trace_enter() returned
  437. * the value it wants us to use in the table lookup.
  438. */
  439. LOAD_ARGS ARGOFFSET, 1
  440. RESTORE_REST
  441. cmpq $__NR_syscall_max,%rax
  442. ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
  443. movq %r10,%rcx /* fixup for C */
  444. call *sys_call_table(,%rax,8)
  445. movq %rax,RAX-ARGOFFSET(%rsp)
  446. /* Use IRET because user could have changed frame */
  447. /*
  448. * Syscall return path ending with IRET.
  449. * Has correct top of stack, but partial stack frame.
  450. */
  451. .globl int_ret_from_sys_call
  452. .globl int_with_check
  453. int_ret_from_sys_call:
  454. DISABLE_INTERRUPTS(CLBR_NONE)
  455. TRACE_IRQS_OFF
  456. testl $3,CS-ARGOFFSET(%rsp)
  457. je retint_restore_args
  458. movl $_TIF_ALLWORK_MASK,%edi
  459. /* edi: mask to check */
  460. int_with_check:
  461. LOCKDEP_SYS_EXIT_IRQ
  462. GET_THREAD_INFO(%rcx)
  463. movl TI_flags(%rcx),%edx
  464. andl %edi,%edx
  465. jnz int_careful
  466. andl $~TS_COMPAT,TI_status(%rcx)
  467. jmp retint_swapgs
  468. /* Either reschedule or signal or syscall exit tracking needed. */
  469. /* First do a reschedule test. */
  470. /* edx: work, edi: workmask */
  471. int_careful:
  472. bt $TIF_NEED_RESCHED,%edx
  473. jnc int_very_careful
  474. TRACE_IRQS_ON
  475. ENABLE_INTERRUPTS(CLBR_NONE)
  476. pushq %rdi
  477. CFI_ADJUST_CFA_OFFSET 8
  478. call schedule
  479. popq %rdi
  480. CFI_ADJUST_CFA_OFFSET -8
  481. DISABLE_INTERRUPTS(CLBR_NONE)
  482. TRACE_IRQS_OFF
  483. jmp int_with_check
  484. /* handle signals and tracing -- both require a full stack frame */
  485. int_very_careful:
  486. TRACE_IRQS_ON
  487. ENABLE_INTERRUPTS(CLBR_NONE)
  488. SAVE_REST
  489. /* Check for syscall exit trace */
  490. testl $_TIF_WORK_SYSCALL_EXIT,%edx
  491. jz int_signal
  492. pushq %rdi
  493. CFI_ADJUST_CFA_OFFSET 8
  494. leaq 8(%rsp),%rdi # &ptregs -> arg1
  495. call syscall_trace_leave
  496. popq %rdi
  497. CFI_ADJUST_CFA_OFFSET -8
  498. andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
  499. jmp int_restore_rest
  500. int_signal:
  501. testl $_TIF_DO_NOTIFY_MASK,%edx
  502. jz 1f
  503. movq %rsp,%rdi # &ptregs -> arg1
  504. xorl %esi,%esi # oldset -> arg2
  505. call do_notify_resume
  506. 1: movl $_TIF_WORK_MASK,%edi
  507. int_restore_rest:
  508. RESTORE_REST
  509. DISABLE_INTERRUPTS(CLBR_NONE)
  510. TRACE_IRQS_OFF
  511. jmp int_with_check
  512. CFI_ENDPROC
  513. END(system_call)
  514. /*
  515. * Certain special system calls that need to save a complete full stack frame.
  516. */
  517. .macro PTREGSCALL label,func,arg
  518. .globl \label
  519. \label:
  520. leaq \func(%rip),%rax
  521. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  522. jmp ptregscall_common
  523. END(\label)
  524. .endm
  525. CFI_STARTPROC
  526. PTREGSCALL stub_clone, sys_clone, %r8
  527. PTREGSCALL stub_fork, sys_fork, %rdi
  528. PTREGSCALL stub_vfork, sys_vfork, %rdi
  529. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  530. PTREGSCALL stub_iopl, sys_iopl, %rsi
  531. ENTRY(ptregscall_common)
  532. popq %r11
  533. CFI_ADJUST_CFA_OFFSET -8
  534. CFI_REGISTER rip, r11
  535. SAVE_REST
  536. movq %r11, %r15
  537. CFI_REGISTER rip, r15
  538. FIXUP_TOP_OF_STACK %r11
  539. call *%rax
  540. RESTORE_TOP_OF_STACK %r11
  541. movq %r15, %r11
  542. CFI_REGISTER rip, r11
  543. RESTORE_REST
  544. pushq %r11
  545. CFI_ADJUST_CFA_OFFSET 8
  546. CFI_REL_OFFSET rip, 0
  547. ret
  548. CFI_ENDPROC
  549. END(ptregscall_common)
  550. ENTRY(stub_execve)
  551. CFI_STARTPROC
  552. popq %r11
  553. CFI_ADJUST_CFA_OFFSET -8
  554. CFI_REGISTER rip, r11
  555. SAVE_REST
  556. FIXUP_TOP_OF_STACK %r11
  557. movq %rsp, %rcx
  558. call sys_execve
  559. RESTORE_TOP_OF_STACK %r11
  560. movq %rax,RAX(%rsp)
  561. RESTORE_REST
  562. jmp int_ret_from_sys_call
  563. CFI_ENDPROC
  564. END(stub_execve)
  565. /*
  566. * sigreturn is special because it needs to restore all registers on return.
  567. * This cannot be done with SYSRET, so use the IRET return path instead.
  568. */
  569. ENTRY(stub_rt_sigreturn)
  570. CFI_STARTPROC
  571. addq $8, %rsp
  572. CFI_ADJUST_CFA_OFFSET -8
  573. SAVE_REST
  574. movq %rsp,%rdi
  575. FIXUP_TOP_OF_STACK %r11
  576. call sys_rt_sigreturn
  577. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  578. RESTORE_REST
  579. jmp int_ret_from_sys_call
  580. CFI_ENDPROC
  581. END(stub_rt_sigreturn)
  582. /*
  583. * initial frame state for interrupts and exceptions
  584. */
  585. .macro _frame ref
  586. CFI_STARTPROC simple
  587. CFI_SIGNAL_FRAME
  588. CFI_DEF_CFA rsp,SS+8-\ref
  589. /*CFI_REL_OFFSET ss,SS-\ref*/
  590. CFI_REL_OFFSET rsp,RSP-\ref
  591. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  592. /*CFI_REL_OFFSET cs,CS-\ref*/
  593. CFI_REL_OFFSET rip,RIP-\ref
  594. .endm
  595. /* initial frame state for interrupts (and exceptions without error code) */
  596. #define INTR_FRAME _frame RIP
  597. /* initial frame state for exceptions with error code (and interrupts with
  598. vector already pushed) */
  599. #define XCPT_FRAME _frame ORIG_RAX
  600. /*
  601. * Interrupt entry/exit.
  602. *
  603. * Interrupt entry points save only callee clobbered registers in fast path.
  604. *
  605. * Entry runs with interrupts off.
  606. */
  607. /* 0(%rsp): interrupt number */
  608. .macro interrupt func
  609. cld
  610. SAVE_ARGS
  611. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  612. pushq %rbp
  613. /*
  614. * Save rbp twice: One is for marking the stack frame, as usual, and the
  615. * other, to fill pt_regs properly. This is because bx comes right
  616. * before the last saved register in that structure, and not bp. If the
  617. * base pointer were in the place bx is today, this would not be needed.
  618. */
  619. movq %rbp, -8(%rsp)
  620. CFI_ADJUST_CFA_OFFSET 8
  621. CFI_REL_OFFSET rbp, 0
  622. movq %rsp,%rbp
  623. CFI_DEF_CFA_REGISTER rbp
  624. testl $3,CS(%rdi)
  625. je 1f
  626. SWAPGS
  627. /* irqcount is used to check if a CPU is already on an interrupt
  628. stack or not. While this is essentially redundant with preempt_count
  629. it is a little cheaper to use a separate counter in the PDA
  630. (short of moving irq_enter into assembly, which would be too
  631. much work) */
  632. 1: incl %gs:pda_irqcount
  633. cmoveq %gs:pda_irqstackptr,%rsp
  634. push %rbp # backlink for old unwinder
  635. /*
  636. * We entered an interrupt context - irqs are off:
  637. */
  638. TRACE_IRQS_OFF
  639. call \func
  640. .endm
  641. ENTRY(common_interrupt)
  642. XCPT_FRAME
  643. interrupt do_IRQ
  644. /* 0(%rsp): oldrsp-ARGOFFSET */
  645. ret_from_intr:
  646. DISABLE_INTERRUPTS(CLBR_NONE)
  647. TRACE_IRQS_OFF
  648. decl %gs:pda_irqcount
  649. leaveq
  650. CFI_DEF_CFA_REGISTER rsp
  651. CFI_ADJUST_CFA_OFFSET -8
  652. exit_intr:
  653. GET_THREAD_INFO(%rcx)
  654. testl $3,CS-ARGOFFSET(%rsp)
  655. je retint_kernel
  656. /* Interrupt came from user space */
  657. /*
  658. * Has a correct top of stack, but a partial stack frame
  659. * %rcx: thread info. Interrupts off.
  660. */
  661. retint_with_reschedule:
  662. movl $_TIF_WORK_MASK,%edi
  663. retint_check:
  664. LOCKDEP_SYS_EXIT_IRQ
  665. movl TI_flags(%rcx),%edx
  666. andl %edi,%edx
  667. CFI_REMEMBER_STATE
  668. jnz retint_careful
  669. retint_swapgs: /* return to user-space */
  670. /*
  671. * The iretq could re-enable interrupts:
  672. */
  673. DISABLE_INTERRUPTS(CLBR_ANY)
  674. TRACE_IRQS_IRETQ
  675. SWAPGS
  676. jmp restore_args
  677. retint_restore_args: /* return to kernel space */
  678. DISABLE_INTERRUPTS(CLBR_ANY)
  679. /*
  680. * The iretq could re-enable interrupts:
  681. */
  682. TRACE_IRQS_IRETQ
  683. restore_args:
  684. RESTORE_ARGS 0,8,0
  685. irq_return:
  686. INTERRUPT_RETURN
  687. .section __ex_table, "a"
  688. .quad irq_return, bad_iret
  689. .previous
  690. #ifdef CONFIG_PARAVIRT
  691. ENTRY(native_iret)
  692. iretq
  693. .section __ex_table,"a"
  694. .quad native_iret, bad_iret
  695. .previous
  696. #endif
  697. .section .fixup,"ax"
  698. bad_iret:
  699. /*
  700. * The iret traps when the %cs or %ss being restored is bogus.
  701. * We've lost the original trap vector and error code.
  702. * #GPF is the most likely one to get for an invalid selector.
  703. * So pretend we completed the iret and took the #GPF in user mode.
  704. *
  705. * We are now running with the kernel GS after exception recovery.
  706. * But error_entry expects us to have user GS to match the user %cs,
  707. * so swap back.
  708. */
  709. pushq $0
  710. SWAPGS
  711. jmp general_protection
  712. .previous
  713. /* edi: workmask, edx: work */
  714. retint_careful:
  715. CFI_RESTORE_STATE
  716. bt $TIF_NEED_RESCHED,%edx
  717. jnc retint_signal
  718. TRACE_IRQS_ON
  719. ENABLE_INTERRUPTS(CLBR_NONE)
  720. pushq %rdi
  721. CFI_ADJUST_CFA_OFFSET 8
  722. call schedule
  723. popq %rdi
  724. CFI_ADJUST_CFA_OFFSET -8
  725. GET_THREAD_INFO(%rcx)
  726. DISABLE_INTERRUPTS(CLBR_NONE)
  727. TRACE_IRQS_OFF
  728. jmp retint_check
  729. retint_signal:
  730. testl $_TIF_DO_NOTIFY_MASK,%edx
  731. jz retint_swapgs
  732. TRACE_IRQS_ON
  733. ENABLE_INTERRUPTS(CLBR_NONE)
  734. SAVE_REST
  735. movq $-1,ORIG_RAX(%rsp)
  736. xorl %esi,%esi # oldset
  737. movq %rsp,%rdi # &pt_regs
  738. call do_notify_resume
  739. RESTORE_REST
  740. DISABLE_INTERRUPTS(CLBR_NONE)
  741. TRACE_IRQS_OFF
  742. GET_THREAD_INFO(%rcx)
  743. jmp retint_with_reschedule
  744. #ifdef CONFIG_PREEMPT
  745. /* Returning to kernel space. Check if we need preemption */
  746. /* rcx: threadinfo. interrupts off. */
  747. ENTRY(retint_kernel)
  748. cmpl $0,TI_preempt_count(%rcx)
  749. jnz retint_restore_args
  750. bt $TIF_NEED_RESCHED,TI_flags(%rcx)
  751. jnc retint_restore_args
  752. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  753. jnc retint_restore_args
  754. call preempt_schedule_irq
  755. jmp exit_intr
  756. #endif
  757. CFI_ENDPROC
  758. END(common_interrupt)
  759. /*
  760. * APIC interrupts.
  761. */
  762. .macro apicinterrupt num,func
  763. INTR_FRAME
  764. pushq $~(\num)
  765. CFI_ADJUST_CFA_OFFSET 8
  766. interrupt \func
  767. jmp ret_from_intr
  768. CFI_ENDPROC
  769. .endm
  770. ENTRY(thermal_interrupt)
  771. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  772. END(thermal_interrupt)
  773. ENTRY(threshold_interrupt)
  774. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  775. END(threshold_interrupt)
  776. #ifdef CONFIG_SMP
  777. ENTRY(reschedule_interrupt)
  778. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  779. END(reschedule_interrupt)
  780. .macro INVALIDATE_ENTRY num
  781. ENTRY(invalidate_interrupt\num)
  782. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  783. END(invalidate_interrupt\num)
  784. .endm
  785. INVALIDATE_ENTRY 0
  786. INVALIDATE_ENTRY 1
  787. INVALIDATE_ENTRY 2
  788. INVALIDATE_ENTRY 3
  789. INVALIDATE_ENTRY 4
  790. INVALIDATE_ENTRY 5
  791. INVALIDATE_ENTRY 6
  792. INVALIDATE_ENTRY 7
  793. ENTRY(call_function_interrupt)
  794. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  795. END(call_function_interrupt)
  796. ENTRY(call_function_single_interrupt)
  797. apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
  798. END(call_function_single_interrupt)
  799. ENTRY(irq_move_cleanup_interrupt)
  800. apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
  801. END(irq_move_cleanup_interrupt)
  802. #endif
  803. ENTRY(apic_timer_interrupt)
  804. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  805. END(apic_timer_interrupt)
  806. ENTRY(uv_bau_message_intr1)
  807. apicinterrupt 220,uv_bau_message_interrupt
  808. END(uv_bau_message_intr1)
  809. ENTRY(error_interrupt)
  810. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  811. END(error_interrupt)
  812. ENTRY(spurious_interrupt)
  813. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  814. END(spurious_interrupt)
  815. /*
  816. * Exception entry points.
  817. */
  818. .macro zeroentry sym
  819. INTR_FRAME
  820. PARAVIRT_ADJUST_EXCEPTION_FRAME
  821. pushq $0 /* push error code/oldrax */
  822. CFI_ADJUST_CFA_OFFSET 8
  823. pushq %rax /* push real oldrax to the rdi slot */
  824. CFI_ADJUST_CFA_OFFSET 8
  825. CFI_REL_OFFSET rax,0
  826. leaq \sym(%rip),%rax
  827. jmp error_entry
  828. CFI_ENDPROC
  829. .endm
  830. .macro errorentry sym
  831. XCPT_FRAME
  832. PARAVIRT_ADJUST_EXCEPTION_FRAME
  833. pushq %rax
  834. CFI_ADJUST_CFA_OFFSET 8
  835. CFI_REL_OFFSET rax,0
  836. leaq \sym(%rip),%rax
  837. jmp error_entry
  838. CFI_ENDPROC
  839. .endm
  840. /* error code is on the stack already */
  841. /* handle NMI like exceptions that can happen everywhere */
  842. .macro paranoidentry sym, ist=0, irqtrace=1
  843. SAVE_ALL
  844. cld
  845. movl $1,%ebx
  846. movl $MSR_GS_BASE,%ecx
  847. rdmsr
  848. testl %edx,%edx
  849. js 1f
  850. SWAPGS
  851. xorl %ebx,%ebx
  852. 1:
  853. .if \ist
  854. movq %gs:pda_data_offset, %rbp
  855. .endif
  856. .if \irqtrace
  857. TRACE_IRQS_OFF
  858. .endif
  859. movq %rsp,%rdi
  860. movq ORIG_RAX(%rsp),%rsi
  861. movq $-1,ORIG_RAX(%rsp)
  862. .if \ist
  863. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  864. .endif
  865. call \sym
  866. .if \ist
  867. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  868. .endif
  869. DISABLE_INTERRUPTS(CLBR_NONE)
  870. .if \irqtrace
  871. TRACE_IRQS_OFF
  872. .endif
  873. .endm
  874. /*
  875. * "Paranoid" exit path from exception stack.
  876. * Paranoid because this is used by NMIs and cannot take
  877. * any kernel state for granted.
  878. * We don't do kernel preemption checks here, because only
  879. * NMI should be common and it does not enable IRQs and
  880. * cannot get reschedule ticks.
  881. *
  882. * "trace" is 0 for the NMI handler only, because irq-tracing
  883. * is fundamentally NMI-unsafe. (we cannot change the soft and
  884. * hard flags at once, atomically)
  885. */
  886. .macro paranoidexit trace=1
  887. /* ebx: no swapgs flag */
  888. paranoid_exit\trace:
  889. testl %ebx,%ebx /* swapgs needed? */
  890. jnz paranoid_restore\trace
  891. testl $3,CS(%rsp)
  892. jnz paranoid_userspace\trace
  893. paranoid_swapgs\trace:
  894. .if \trace
  895. TRACE_IRQS_IRETQ 0
  896. .endif
  897. SWAPGS_UNSAFE_STACK
  898. paranoid_restore\trace:
  899. RESTORE_ALL 8
  900. jmp irq_return
  901. paranoid_userspace\trace:
  902. GET_THREAD_INFO(%rcx)
  903. movl TI_flags(%rcx),%ebx
  904. andl $_TIF_WORK_MASK,%ebx
  905. jz paranoid_swapgs\trace
  906. movq %rsp,%rdi /* &pt_regs */
  907. call sync_regs
  908. movq %rax,%rsp /* switch stack for scheduling */
  909. testl $_TIF_NEED_RESCHED,%ebx
  910. jnz paranoid_schedule\trace
  911. movl %ebx,%edx /* arg3: thread flags */
  912. .if \trace
  913. TRACE_IRQS_ON
  914. .endif
  915. ENABLE_INTERRUPTS(CLBR_NONE)
  916. xorl %esi,%esi /* arg2: oldset */
  917. movq %rsp,%rdi /* arg1: &pt_regs */
  918. call do_notify_resume
  919. DISABLE_INTERRUPTS(CLBR_NONE)
  920. .if \trace
  921. TRACE_IRQS_OFF
  922. .endif
  923. jmp paranoid_userspace\trace
  924. paranoid_schedule\trace:
  925. .if \trace
  926. TRACE_IRQS_ON
  927. .endif
  928. ENABLE_INTERRUPTS(CLBR_ANY)
  929. call schedule
  930. DISABLE_INTERRUPTS(CLBR_ANY)
  931. .if \trace
  932. TRACE_IRQS_OFF
  933. .endif
  934. jmp paranoid_userspace\trace
  935. CFI_ENDPROC
  936. .endm
  937. /*
  938. * Exception entry point. This expects an error code/orig_rax on the stack
  939. * and the exception handler in %rax.
  940. */
  941. KPROBE_ENTRY(error_entry)
  942. _frame RDI
  943. CFI_REL_OFFSET rax,0
  944. /* rdi slot contains rax, oldrax contains error code */
  945. cld
  946. subq $14*8,%rsp
  947. CFI_ADJUST_CFA_OFFSET (14*8)
  948. movq %rsi,13*8(%rsp)
  949. CFI_REL_OFFSET rsi,RSI
  950. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  951. CFI_REGISTER rax,rsi
  952. movq %rdx,12*8(%rsp)
  953. CFI_REL_OFFSET rdx,RDX
  954. movq %rcx,11*8(%rsp)
  955. CFI_REL_OFFSET rcx,RCX
  956. movq %rsi,10*8(%rsp) /* store rax */
  957. CFI_REL_OFFSET rax,RAX
  958. movq %r8, 9*8(%rsp)
  959. CFI_REL_OFFSET r8,R8
  960. movq %r9, 8*8(%rsp)
  961. CFI_REL_OFFSET r9,R9
  962. movq %r10,7*8(%rsp)
  963. CFI_REL_OFFSET r10,R10
  964. movq %r11,6*8(%rsp)
  965. CFI_REL_OFFSET r11,R11
  966. movq %rbx,5*8(%rsp)
  967. CFI_REL_OFFSET rbx,RBX
  968. movq %rbp,4*8(%rsp)
  969. CFI_REL_OFFSET rbp,RBP
  970. movq %r12,3*8(%rsp)
  971. CFI_REL_OFFSET r12,R12
  972. movq %r13,2*8(%rsp)
  973. CFI_REL_OFFSET r13,R13
  974. movq %r14,1*8(%rsp)
  975. CFI_REL_OFFSET r14,R14
  976. movq %r15,(%rsp)
  977. CFI_REL_OFFSET r15,R15
  978. xorl %ebx,%ebx
  979. testl $3,CS(%rsp)
  980. je error_kernelspace
  981. error_swapgs:
  982. SWAPGS
  983. error_sti:
  984. TRACE_IRQS_OFF
  985. movq %rdi,RDI(%rsp)
  986. CFI_REL_OFFSET rdi,RDI
  987. movq %rsp,%rdi
  988. movq ORIG_RAX(%rsp),%rsi /* get error code */
  989. movq $-1,ORIG_RAX(%rsp)
  990. call *%rax
  991. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  992. error_exit:
  993. movl %ebx,%eax
  994. RESTORE_REST
  995. DISABLE_INTERRUPTS(CLBR_NONE)
  996. TRACE_IRQS_OFF
  997. GET_THREAD_INFO(%rcx)
  998. testl %eax,%eax
  999. jne retint_kernel
  1000. LOCKDEP_SYS_EXIT_IRQ
  1001. movl TI_flags(%rcx),%edx
  1002. movl $_TIF_WORK_MASK,%edi
  1003. andl %edi,%edx
  1004. jnz retint_careful
  1005. jmp retint_swapgs
  1006. CFI_ENDPROC
  1007. error_kernelspace:
  1008. incl %ebx
  1009. /* There are two places in the kernel that can potentially fault with
  1010. usergs. Handle them here. The exception handlers after
  1011. iret run with kernel gs again, so don't set the user space flag.
  1012. B stepping K8s sometimes report an truncated RIP for IRET
  1013. exceptions returning to compat mode. Check for these here too. */
  1014. leaq irq_return(%rip),%rcx
  1015. cmpq %rcx,RIP(%rsp)
  1016. je error_swapgs
  1017. movl %ecx,%ecx /* zero extend */
  1018. cmpq %rcx,RIP(%rsp)
  1019. je error_swapgs
  1020. cmpq $gs_change,RIP(%rsp)
  1021. je error_swapgs
  1022. jmp error_sti
  1023. KPROBE_END(error_entry)
  1024. /* Reload gs selector with exception handling */
  1025. /* edi: new selector */
  1026. ENTRY(native_load_gs_index)
  1027. CFI_STARTPROC
  1028. pushf
  1029. CFI_ADJUST_CFA_OFFSET 8
  1030. DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
  1031. SWAPGS
  1032. gs_change:
  1033. movl %edi,%gs
  1034. 2: mfence /* workaround */
  1035. SWAPGS
  1036. popf
  1037. CFI_ADJUST_CFA_OFFSET -8
  1038. ret
  1039. CFI_ENDPROC
  1040. ENDPROC(native_load_gs_index)
  1041. .section __ex_table,"a"
  1042. .align 8
  1043. .quad gs_change,bad_gs
  1044. .previous
  1045. .section .fixup,"ax"
  1046. /* running with kernelgs */
  1047. bad_gs:
  1048. SWAPGS /* switch back to user gs */
  1049. xorl %eax,%eax
  1050. movl %eax,%gs
  1051. jmp 2b
  1052. .previous
  1053. /*
  1054. * Create a kernel thread.
  1055. *
  1056. * C extern interface:
  1057. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  1058. *
  1059. * asm input arguments:
  1060. * rdi: fn, rsi: arg, rdx: flags
  1061. */
  1062. ENTRY(kernel_thread)
  1063. CFI_STARTPROC
  1064. FAKE_STACK_FRAME $child_rip
  1065. SAVE_ALL
  1066. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  1067. movq %rdx,%rdi
  1068. orq kernel_thread_flags(%rip),%rdi
  1069. movq $-1, %rsi
  1070. movq %rsp, %rdx
  1071. xorl %r8d,%r8d
  1072. xorl %r9d,%r9d
  1073. # clone now
  1074. call do_fork
  1075. movq %rax,RAX(%rsp)
  1076. xorl %edi,%edi
  1077. /*
  1078. * It isn't worth to check for reschedule here,
  1079. * so internally to the x86_64 port you can rely on kernel_thread()
  1080. * not to reschedule the child before returning, this avoids the need
  1081. * of hacks for example to fork off the per-CPU idle tasks.
  1082. * [Hopefully no generic code relies on the reschedule -AK]
  1083. */
  1084. RESTORE_ALL
  1085. UNFAKE_STACK_FRAME
  1086. ret
  1087. CFI_ENDPROC
  1088. ENDPROC(kernel_thread)
  1089. child_rip:
  1090. pushq $0 # fake return address
  1091. CFI_STARTPROC
  1092. /*
  1093. * Here we are in the child and the registers are set as they were
  1094. * at kernel_thread() invocation in the parent.
  1095. */
  1096. movq %rdi, %rax
  1097. movq %rsi, %rdi
  1098. call *%rax
  1099. # exit
  1100. mov %eax, %edi
  1101. call do_exit
  1102. CFI_ENDPROC
  1103. ENDPROC(child_rip)
  1104. /*
  1105. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  1106. *
  1107. * C extern interface:
  1108. * extern long execve(char *name, char **argv, char **envp)
  1109. *
  1110. * asm input arguments:
  1111. * rdi: name, rsi: argv, rdx: envp
  1112. *
  1113. * We want to fallback into:
  1114. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
  1115. *
  1116. * do_sys_execve asm fallback arguments:
  1117. * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
  1118. */
  1119. ENTRY(kernel_execve)
  1120. CFI_STARTPROC
  1121. FAKE_STACK_FRAME $0
  1122. SAVE_ALL
  1123. movq %rsp,%rcx
  1124. call sys_execve
  1125. movq %rax, RAX(%rsp)
  1126. RESTORE_REST
  1127. testq %rax,%rax
  1128. je int_ret_from_sys_call
  1129. RESTORE_ARGS
  1130. UNFAKE_STACK_FRAME
  1131. ret
  1132. CFI_ENDPROC
  1133. ENDPROC(kernel_execve)
  1134. KPROBE_ENTRY(page_fault)
  1135. errorentry do_page_fault
  1136. KPROBE_END(page_fault)
  1137. ENTRY(coprocessor_error)
  1138. zeroentry do_coprocessor_error
  1139. END(coprocessor_error)
  1140. ENTRY(simd_coprocessor_error)
  1141. zeroentry do_simd_coprocessor_error
  1142. END(simd_coprocessor_error)
  1143. ENTRY(device_not_available)
  1144. zeroentry do_device_not_available
  1145. END(device_not_available)
  1146. /* runs on exception stack */
  1147. KPROBE_ENTRY(debug)
  1148. INTR_FRAME
  1149. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1150. pushq $0
  1151. CFI_ADJUST_CFA_OFFSET 8
  1152. paranoidentry do_debug, DEBUG_STACK
  1153. paranoidexit
  1154. KPROBE_END(debug)
  1155. /* runs on exception stack */
  1156. KPROBE_ENTRY(nmi)
  1157. INTR_FRAME
  1158. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1159. pushq $-1
  1160. CFI_ADJUST_CFA_OFFSET 8
  1161. paranoidentry do_nmi, 0, 0
  1162. #ifdef CONFIG_TRACE_IRQFLAGS
  1163. paranoidexit 0
  1164. #else
  1165. jmp paranoid_exit1
  1166. CFI_ENDPROC
  1167. #endif
  1168. KPROBE_END(nmi)
  1169. KPROBE_ENTRY(int3)
  1170. INTR_FRAME
  1171. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1172. pushq $0
  1173. CFI_ADJUST_CFA_OFFSET 8
  1174. paranoidentry do_int3, DEBUG_STACK
  1175. jmp paranoid_exit1
  1176. CFI_ENDPROC
  1177. KPROBE_END(int3)
  1178. ENTRY(overflow)
  1179. zeroentry do_overflow
  1180. END(overflow)
  1181. ENTRY(bounds)
  1182. zeroentry do_bounds
  1183. END(bounds)
  1184. ENTRY(invalid_op)
  1185. zeroentry do_invalid_op
  1186. END(invalid_op)
  1187. ENTRY(coprocessor_segment_overrun)
  1188. zeroentry do_coprocessor_segment_overrun
  1189. END(coprocessor_segment_overrun)
  1190. /* runs on exception stack */
  1191. ENTRY(double_fault)
  1192. XCPT_FRAME
  1193. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1194. paranoidentry do_double_fault
  1195. jmp paranoid_exit1
  1196. CFI_ENDPROC
  1197. END(double_fault)
  1198. ENTRY(invalid_TSS)
  1199. errorentry do_invalid_TSS
  1200. END(invalid_TSS)
  1201. ENTRY(segment_not_present)
  1202. errorentry do_segment_not_present
  1203. END(segment_not_present)
  1204. /* runs on exception stack */
  1205. ENTRY(stack_segment)
  1206. XCPT_FRAME
  1207. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1208. paranoidentry do_stack_segment
  1209. jmp paranoid_exit1
  1210. CFI_ENDPROC
  1211. END(stack_segment)
  1212. KPROBE_ENTRY(general_protection)
  1213. errorentry do_general_protection
  1214. KPROBE_END(general_protection)
  1215. ENTRY(alignment_check)
  1216. errorentry do_alignment_check
  1217. END(alignment_check)
  1218. ENTRY(divide_error)
  1219. zeroentry do_divide_error
  1220. END(divide_error)
  1221. ENTRY(spurious_interrupt_bug)
  1222. zeroentry do_spurious_interrupt_bug
  1223. END(spurious_interrupt_bug)
  1224. #ifdef CONFIG_X86_MCE
  1225. /* runs on exception stack */
  1226. ENTRY(machine_check)
  1227. INTR_FRAME
  1228. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1229. pushq $0
  1230. CFI_ADJUST_CFA_OFFSET 8
  1231. paranoidentry do_machine_check
  1232. jmp paranoid_exit1
  1233. CFI_ENDPROC
  1234. END(machine_check)
  1235. #endif
  1236. /* Call softirq on interrupt stack. Interrupts are off. */
  1237. ENTRY(call_softirq)
  1238. CFI_STARTPROC
  1239. push %rbp
  1240. CFI_ADJUST_CFA_OFFSET 8
  1241. CFI_REL_OFFSET rbp,0
  1242. mov %rsp,%rbp
  1243. CFI_DEF_CFA_REGISTER rbp
  1244. incl %gs:pda_irqcount
  1245. cmove %gs:pda_irqstackptr,%rsp
  1246. push %rbp # backlink for old unwinder
  1247. call __do_softirq
  1248. leaveq
  1249. CFI_DEF_CFA_REGISTER rsp
  1250. CFI_ADJUST_CFA_OFFSET -8
  1251. decl %gs:pda_irqcount
  1252. ret
  1253. CFI_ENDPROC
  1254. ENDPROC(call_softirq)
  1255. KPROBE_ENTRY(ignore_sysret)
  1256. CFI_STARTPROC
  1257. mov $-ENOSYS,%eax
  1258. sysret
  1259. CFI_ENDPROC
  1260. ENDPROC(ignore_sysret)
  1261. #ifdef CONFIG_XEN
  1262. ENTRY(xen_hypervisor_callback)
  1263. zeroentry xen_do_hypervisor_callback
  1264. END(xen_hypervisor_callback)
  1265. /*
  1266. # A note on the "critical region" in our callback handler.
  1267. # We want to avoid stacking callback handlers due to events occurring
  1268. # during handling of the last event. To do this, we keep events disabled
  1269. # until we've done all processing. HOWEVER, we must enable events before
  1270. # popping the stack frame (can't be done atomically) and so it would still
  1271. # be possible to get enough handler activations to overflow the stack.
  1272. # Although unlikely, bugs of that kind are hard to track down, so we'd
  1273. # like to avoid the possibility.
  1274. # So, on entry to the handler we detect whether we interrupted an
  1275. # existing activation in its critical region -- if so, we pop the current
  1276. # activation and restart the handler using the previous one.
  1277. */
  1278. ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
  1279. CFI_STARTPROC
  1280. /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
  1281. see the correct pointer to the pt_regs */
  1282. movq %rdi, %rsp # we don't return, adjust the stack frame
  1283. CFI_ENDPROC
  1284. CFI_DEFAULT_STACK
  1285. 11: incl %gs:pda_irqcount
  1286. movq %rsp,%rbp
  1287. CFI_DEF_CFA_REGISTER rbp
  1288. cmovzq %gs:pda_irqstackptr,%rsp
  1289. pushq %rbp # backlink for old unwinder
  1290. call xen_evtchn_do_upcall
  1291. popq %rsp
  1292. CFI_DEF_CFA_REGISTER rsp
  1293. decl %gs:pda_irqcount
  1294. jmp error_exit
  1295. CFI_ENDPROC
  1296. END(do_hypervisor_callback)
  1297. /*
  1298. # Hypervisor uses this for application faults while it executes.
  1299. # We get here for two reasons:
  1300. # 1. Fault while reloading DS, ES, FS or GS
  1301. # 2. Fault while executing IRET
  1302. # Category 1 we do not need to fix up as Xen has already reloaded all segment
  1303. # registers that could be reloaded and zeroed the others.
  1304. # Category 2 we fix up by killing the current process. We cannot use the
  1305. # normal Linux return path in this case because if we use the IRET hypercall
  1306. # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
  1307. # We distinguish between categories by comparing each saved segment register
  1308. # with its current contents: any discrepancy means we in category 1.
  1309. */
  1310. ENTRY(xen_failsafe_callback)
  1311. framesz = (RIP-0x30) /* workaround buggy gas */
  1312. _frame framesz
  1313. CFI_REL_OFFSET rcx, 0
  1314. CFI_REL_OFFSET r11, 8
  1315. movw %ds,%cx
  1316. cmpw %cx,0x10(%rsp)
  1317. CFI_REMEMBER_STATE
  1318. jne 1f
  1319. movw %es,%cx
  1320. cmpw %cx,0x18(%rsp)
  1321. jne 1f
  1322. movw %fs,%cx
  1323. cmpw %cx,0x20(%rsp)
  1324. jne 1f
  1325. movw %gs,%cx
  1326. cmpw %cx,0x28(%rsp)
  1327. jne 1f
  1328. /* All segments match their saved values => Category 2 (Bad IRET). */
  1329. movq (%rsp),%rcx
  1330. CFI_RESTORE rcx
  1331. movq 8(%rsp),%r11
  1332. CFI_RESTORE r11
  1333. addq $0x30,%rsp
  1334. CFI_ADJUST_CFA_OFFSET -0x30
  1335. pushq $0
  1336. CFI_ADJUST_CFA_OFFSET 8
  1337. pushq %r11
  1338. CFI_ADJUST_CFA_OFFSET 8
  1339. pushq %rcx
  1340. CFI_ADJUST_CFA_OFFSET 8
  1341. jmp general_protection
  1342. CFI_RESTORE_STATE
  1343. 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
  1344. movq (%rsp),%rcx
  1345. CFI_RESTORE rcx
  1346. movq 8(%rsp),%r11
  1347. CFI_RESTORE r11
  1348. addq $0x30,%rsp
  1349. CFI_ADJUST_CFA_OFFSET -0x30
  1350. pushq $0
  1351. CFI_ADJUST_CFA_OFFSET 8
  1352. SAVE_ALL
  1353. jmp error_exit
  1354. CFI_ENDPROC
  1355. END(xen_failsafe_callback)
  1356. #endif /* CONFIG_XEN */