entry_64.S 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. */
  8. /*
  9. * entry.S contains the system-call and fault low-level handling routines.
  10. *
  11. * NOTE: This code handles signal-recognition, which happens every time
  12. * after an interrupt and after each system call.
  13. *
  14. * Normal syscalls and interrupts don't save a full stack frame, this is
  15. * only done for syscall tracing, signals or fork/exec et.al.
  16. *
  17. * A note on terminology:
  18. * - top of stack: Architecture defined interrupt frame from SS to RIP
  19. * at the top of the kernel process stack.
  20. * - partial stack frame: partially saved registers upto R11.
  21. * - full stack frame: Like partial stack frame, but all register saved.
  22. *
  23. * Some macro usage:
  24. * - CFI macros are used to generate dwarf2 unwind information for better
  25. * backtraces. They don't change any code.
  26. * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
  27. * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
  28. * There are unfortunately lots of special cases where some registers
  29. * not touched. The macro is a big mess that should be cleaned up.
  30. * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  31. * Gives a full stack frame.
  32. * - ENTRY/END Define functions in the symbol table.
  33. * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  34. * frame that is otherwise undefined after a SYSCALL
  35. * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  36. * - errorentry/paranoidentry/zeroentry - Define exception entry points.
  37. */
  38. #include <linux/linkage.h>
  39. #include <asm/segment.h>
  40. #include <asm/cache.h>
  41. #include <asm/errno.h>
  42. #include <asm/dwarf2.h>
  43. #include <asm/calling.h>
  44. #include <asm/asm-offsets.h>
  45. #include <asm/msr.h>
  46. #include <asm/unistd.h>
  47. #include <asm/thread_info.h>
  48. #include <asm/hw_irq.h>
  49. #include <asm/page.h>
  50. #include <asm/irqflags.h>
  51. #include <asm/paravirt.h>
  52. #include <asm/ftrace.h>
  53. /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
  54. #include <linux/elf-em.h>
  55. #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
  56. #define __AUDIT_ARCH_64BIT 0x80000000
  57. #define __AUDIT_ARCH_LE 0x40000000
  58. .code64
  59. #ifdef CONFIG_FTRACE
  60. #ifdef CONFIG_DYNAMIC_FTRACE
  61. ENTRY(mcount)
  62. subq $0x38, %rsp
  63. movq %rax, (%rsp)
  64. movq %rcx, 8(%rsp)
  65. movq %rdx, 16(%rsp)
  66. movq %rsi, 24(%rsp)
  67. movq %rdi, 32(%rsp)
  68. movq %r8, 40(%rsp)
  69. movq %r9, 48(%rsp)
  70. movq 0x38(%rsp), %rdi
  71. subq $MCOUNT_INSN_SIZE, %rdi
  72. .globl mcount_call
  73. mcount_call:
  74. call ftrace_stub
  75. movq 48(%rsp), %r9
  76. movq 40(%rsp), %r8
  77. movq 32(%rsp), %rdi
  78. movq 24(%rsp), %rsi
  79. movq 16(%rsp), %rdx
  80. movq 8(%rsp), %rcx
  81. movq (%rsp), %rax
  82. addq $0x38, %rsp
  83. retq
  84. END(mcount)
  85. ENTRY(ftrace_caller)
  86. /* taken from glibc */
  87. subq $0x38, %rsp
  88. movq %rax, (%rsp)
  89. movq %rcx, 8(%rsp)
  90. movq %rdx, 16(%rsp)
  91. movq %rsi, 24(%rsp)
  92. movq %rdi, 32(%rsp)
  93. movq %r8, 40(%rsp)
  94. movq %r9, 48(%rsp)
  95. movq 0x38(%rsp), %rdi
  96. movq 8(%rbp), %rsi
  97. subq $MCOUNT_INSN_SIZE, %rdi
  98. .globl ftrace_call
  99. ftrace_call:
  100. call ftrace_stub
  101. movq 48(%rsp), %r9
  102. movq 40(%rsp), %r8
  103. movq 32(%rsp), %rdi
  104. movq 24(%rsp), %rsi
  105. movq 16(%rsp), %rdx
  106. movq 8(%rsp), %rcx
  107. movq (%rsp), %rax
  108. addq $0x38, %rsp
  109. .globl ftrace_stub
  110. ftrace_stub:
  111. retq
  112. END(ftrace_caller)
  113. #else /* ! CONFIG_DYNAMIC_FTRACE */
  114. ENTRY(mcount)
  115. cmpq $ftrace_stub, ftrace_trace_function
  116. jnz trace
  117. .globl ftrace_stub
  118. ftrace_stub:
  119. retq
  120. trace:
  121. /* taken from glibc */
  122. subq $0x38, %rsp
  123. movq %rax, (%rsp)
  124. movq %rcx, 8(%rsp)
  125. movq %rdx, 16(%rsp)
  126. movq %rsi, 24(%rsp)
  127. movq %rdi, 32(%rsp)
  128. movq %r8, 40(%rsp)
  129. movq %r9, 48(%rsp)
  130. movq 0x38(%rsp), %rdi
  131. movq 8(%rbp), %rsi
  132. subq $MCOUNT_INSN_SIZE, %rdi
  133. call *ftrace_trace_function
  134. movq 48(%rsp), %r9
  135. movq 40(%rsp), %r8
  136. movq 32(%rsp), %rdi
  137. movq 24(%rsp), %rsi
  138. movq 16(%rsp), %rdx
  139. movq 8(%rsp), %rcx
  140. movq (%rsp), %rax
  141. addq $0x38, %rsp
  142. jmp ftrace_stub
  143. END(mcount)
  144. #endif /* CONFIG_DYNAMIC_FTRACE */
  145. #endif /* CONFIG_FTRACE */
  146. #ifndef CONFIG_PREEMPT
  147. #define retint_kernel retint_restore_args
  148. #endif
  149. #ifdef CONFIG_PARAVIRT
  150. ENTRY(native_usergs_sysret64)
  151. swapgs
  152. sysretq
  153. #endif /* CONFIG_PARAVIRT */
  154. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  155. #ifdef CONFIG_TRACE_IRQFLAGS
  156. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  157. jnc 1f
  158. TRACE_IRQS_ON
  159. 1:
  160. #endif
  161. .endm
  162. /*
  163. * C code is not supposed to know about undefined top of stack. Every time
  164. * a C function with an pt_regs argument is called from the SYSCALL based
  165. * fast path FIXUP_TOP_OF_STACK is needed.
  166. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  167. * manipulation.
  168. */
  169. /* %rsp:at FRAMEEND */
  170. .macro FIXUP_TOP_OF_STACK tmp
  171. movq %gs:pda_oldrsp,\tmp
  172. movq \tmp,RSP(%rsp)
  173. movq $__USER_DS,SS(%rsp)
  174. movq $__USER_CS,CS(%rsp)
  175. movq $-1,RCX(%rsp)
  176. movq R11(%rsp),\tmp /* get eflags */
  177. movq \tmp,EFLAGS(%rsp)
  178. .endm
  179. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  180. movq RSP-\offset(%rsp),\tmp
  181. movq \tmp,%gs:pda_oldrsp
  182. movq EFLAGS-\offset(%rsp),\tmp
  183. movq \tmp,R11-\offset(%rsp)
  184. .endm
  185. .macro FAKE_STACK_FRAME child_rip
  186. /* push in order ss, rsp, eflags, cs, rip */
  187. xorl %eax, %eax
  188. pushq $__KERNEL_DS /* ss */
  189. CFI_ADJUST_CFA_OFFSET 8
  190. /*CFI_REL_OFFSET ss,0*/
  191. pushq %rax /* rsp */
  192. CFI_ADJUST_CFA_OFFSET 8
  193. CFI_REL_OFFSET rsp,0
  194. pushq $(1<<9) /* eflags - interrupts on */
  195. CFI_ADJUST_CFA_OFFSET 8
  196. /*CFI_REL_OFFSET rflags,0*/
  197. pushq $__KERNEL_CS /* cs */
  198. CFI_ADJUST_CFA_OFFSET 8
  199. /*CFI_REL_OFFSET cs,0*/
  200. pushq \child_rip /* rip */
  201. CFI_ADJUST_CFA_OFFSET 8
  202. CFI_REL_OFFSET rip,0
  203. pushq %rax /* orig rax */
  204. CFI_ADJUST_CFA_OFFSET 8
  205. .endm
  206. .macro UNFAKE_STACK_FRAME
  207. addq $8*6, %rsp
  208. CFI_ADJUST_CFA_OFFSET -(6*8)
  209. .endm
  210. .macro CFI_DEFAULT_STACK start=1
  211. .if \start
  212. CFI_STARTPROC simple
  213. CFI_SIGNAL_FRAME
  214. CFI_DEF_CFA rsp,SS+8
  215. .else
  216. CFI_DEF_CFA_OFFSET SS+8
  217. .endif
  218. CFI_REL_OFFSET r15,R15
  219. CFI_REL_OFFSET r14,R14
  220. CFI_REL_OFFSET r13,R13
  221. CFI_REL_OFFSET r12,R12
  222. CFI_REL_OFFSET rbp,RBP
  223. CFI_REL_OFFSET rbx,RBX
  224. CFI_REL_OFFSET r11,R11
  225. CFI_REL_OFFSET r10,R10
  226. CFI_REL_OFFSET r9,R9
  227. CFI_REL_OFFSET r8,R8
  228. CFI_REL_OFFSET rax,RAX
  229. CFI_REL_OFFSET rcx,RCX
  230. CFI_REL_OFFSET rdx,RDX
  231. CFI_REL_OFFSET rsi,RSI
  232. CFI_REL_OFFSET rdi,RDI
  233. CFI_REL_OFFSET rip,RIP
  234. /*CFI_REL_OFFSET cs,CS*/
  235. /*CFI_REL_OFFSET rflags,EFLAGS*/
  236. CFI_REL_OFFSET rsp,RSP
  237. /*CFI_REL_OFFSET ss,SS*/
  238. .endm
  239. /*
  240. * A newly forked process directly context switches into this.
  241. */
  242. /* rdi: prev */
  243. ENTRY(ret_from_fork)
  244. CFI_DEFAULT_STACK
  245. push kernel_eflags(%rip)
  246. CFI_ADJUST_CFA_OFFSET 8
  247. popf # reset kernel eflags
  248. CFI_ADJUST_CFA_OFFSET -8
  249. call schedule_tail
  250. GET_THREAD_INFO(%rcx)
  251. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  252. jnz rff_trace
  253. rff_action:
  254. RESTORE_REST
  255. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  256. je int_ret_from_sys_call
  257. testl $_TIF_IA32,TI_flags(%rcx)
  258. jnz int_ret_from_sys_call
  259. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  260. jmp ret_from_sys_call
  261. rff_trace:
  262. movq %rsp,%rdi
  263. call syscall_trace_leave
  264. GET_THREAD_INFO(%rcx)
  265. jmp rff_action
  266. CFI_ENDPROC
  267. END(ret_from_fork)
  268. /*
  269. * System call entry. Upto 6 arguments in registers are supported.
  270. *
  271. * SYSCALL does not save anything on the stack and does not change the
  272. * stack pointer.
  273. */
  274. /*
  275. * Register setup:
  276. * rax system call number
  277. * rdi arg0
  278. * rcx return address for syscall/sysret, C arg3
  279. * rsi arg1
  280. * rdx arg2
  281. * r10 arg3 (--> moved to rcx for C)
  282. * r8 arg4
  283. * r9 arg5
  284. * r11 eflags for syscall/sysret, temporary for C
  285. * r12-r15,rbp,rbx saved by C code, not touched.
  286. *
  287. * Interrupts are off on entry.
  288. * Only called from user space.
  289. *
  290. * XXX if we had a free scratch register we could save the RSP into the stack frame
  291. * and report it properly in ps. Unfortunately we haven't.
  292. *
  293. * When user can change the frames always force IRET. That is because
  294. * it deals with uncanonical addresses better. SYSRET has trouble
  295. * with them due to bugs in both AMD and Intel CPUs.
  296. */
  297. ENTRY(system_call)
  298. CFI_STARTPROC simple
  299. CFI_SIGNAL_FRAME
  300. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  301. CFI_REGISTER rip,rcx
  302. /*CFI_REGISTER rflags,r11*/
  303. SWAPGS_UNSAFE_STACK
  304. /*
  305. * A hypervisor implementation might want to use a label
  306. * after the swapgs, so that it can do the swapgs
  307. * for the guest and jump here on syscall.
  308. */
  309. ENTRY(system_call_after_swapgs)
  310. movq %rsp,%gs:pda_oldrsp
  311. movq %gs:pda_kernelstack,%rsp
  312. /*
  313. * No need to follow this irqs off/on section - it's straight
  314. * and short:
  315. */
  316. ENABLE_INTERRUPTS(CLBR_NONE)
  317. SAVE_ARGS 8,1
  318. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  319. movq %rcx,RIP-ARGOFFSET(%rsp)
  320. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  321. GET_THREAD_INFO(%rcx)
  322. testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
  323. jnz tracesys
  324. system_call_fastpath:
  325. cmpq $__NR_syscall_max,%rax
  326. ja badsys
  327. movq %r10,%rcx
  328. call *sys_call_table(,%rax,8) # XXX: rip relative
  329. movq %rax,RAX-ARGOFFSET(%rsp)
  330. /*
  331. * Syscall return path ending with SYSRET (fast path)
  332. * Has incomplete stack frame and undefined top of stack.
  333. */
  334. ret_from_sys_call:
  335. movl $_TIF_ALLWORK_MASK,%edi
  336. /* edi: flagmask */
  337. sysret_check:
  338. LOCKDEP_SYS_EXIT
  339. GET_THREAD_INFO(%rcx)
  340. DISABLE_INTERRUPTS(CLBR_NONE)
  341. TRACE_IRQS_OFF
  342. movl TI_flags(%rcx),%edx
  343. andl %edi,%edx
  344. jnz sysret_careful
  345. CFI_REMEMBER_STATE
  346. /*
  347. * sysretq will re-enable interrupts:
  348. */
  349. TRACE_IRQS_ON
  350. movq RIP-ARGOFFSET(%rsp),%rcx
  351. CFI_REGISTER rip,rcx
  352. RESTORE_ARGS 0,-ARG_SKIP,1
  353. /*CFI_REGISTER rflags,r11*/
  354. movq %gs:pda_oldrsp, %rsp
  355. USERGS_SYSRET64
  356. CFI_RESTORE_STATE
  357. /* Handle reschedules */
  358. /* edx: work, edi: workmask */
  359. sysret_careful:
  360. bt $TIF_NEED_RESCHED,%edx
  361. jnc sysret_signal
  362. TRACE_IRQS_ON
  363. ENABLE_INTERRUPTS(CLBR_NONE)
  364. pushq %rdi
  365. CFI_ADJUST_CFA_OFFSET 8
  366. call schedule
  367. popq %rdi
  368. CFI_ADJUST_CFA_OFFSET -8
  369. jmp sysret_check
  370. /* Handle a signal */
  371. sysret_signal:
  372. TRACE_IRQS_ON
  373. ENABLE_INTERRUPTS(CLBR_NONE)
  374. #ifdef CONFIG_AUDITSYSCALL
  375. bt $TIF_SYSCALL_AUDIT,%edx
  376. jc sysret_audit
  377. #endif
  378. /* edx: work flags (arg3) */
  379. leaq do_notify_resume(%rip),%rax
  380. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  381. xorl %esi,%esi # oldset -> arg2
  382. call ptregscall_common
  383. movl $_TIF_WORK_MASK,%edi
  384. /* Use IRET because user could have changed frame. This
  385. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  386. DISABLE_INTERRUPTS(CLBR_NONE)
  387. TRACE_IRQS_OFF
  388. jmp int_with_check
  389. badsys:
  390. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  391. jmp ret_from_sys_call
  392. #ifdef CONFIG_AUDITSYSCALL
  393. /*
  394. * Fast path for syscall audit without full syscall trace.
  395. * We just call audit_syscall_entry() directly, and then
  396. * jump back to the normal fast path.
  397. */
  398. auditsys:
  399. movq %r10,%r9 /* 6th arg: 4th syscall arg */
  400. movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
  401. movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
  402. movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
  403. movq %rax,%rsi /* 2nd arg: syscall number */
  404. movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
  405. call audit_syscall_entry
  406. LOAD_ARGS 0 /* reload call-clobbered registers */
  407. jmp system_call_fastpath
  408. /*
  409. * Return fast path for syscall audit. Call audit_syscall_exit()
  410. * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
  411. * masked off.
  412. */
  413. sysret_audit:
  414. movq %rax,%rsi /* second arg, syscall return value */
  415. cmpq $0,%rax /* is it < 0? */
  416. setl %al /* 1 if so, 0 if not */
  417. movzbl %al,%edi /* zero-extend that into %edi */
  418. inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
  419. call audit_syscall_exit
  420. movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
  421. jmp sysret_check
  422. #endif /* CONFIG_AUDITSYSCALL */
  423. /* Do syscall tracing */
  424. tracesys:
  425. #ifdef CONFIG_AUDITSYSCALL
  426. testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  427. jz auditsys
  428. #endif
  429. SAVE_REST
  430. movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
  431. FIXUP_TOP_OF_STACK %rdi
  432. movq %rsp,%rdi
  433. call syscall_trace_enter
  434. /*
  435. * Reload arg registers from stack in case ptrace changed them.
  436. * We don't reload %rax because syscall_trace_enter() returned
  437. * the value it wants us to use in the table lookup.
  438. */
  439. LOAD_ARGS ARGOFFSET, 1
  440. RESTORE_REST
  441. cmpq $__NR_syscall_max,%rax
  442. ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
  443. movq %r10,%rcx /* fixup for C */
  444. call *sys_call_table(,%rax,8)
  445. movq %rax,RAX-ARGOFFSET(%rsp)
  446. /* Use IRET because user could have changed frame */
  447. /*
  448. * Syscall return path ending with IRET.
  449. * Has correct top of stack, but partial stack frame.
  450. */
  451. .globl int_ret_from_sys_call
  452. .globl int_with_check
  453. int_ret_from_sys_call:
  454. DISABLE_INTERRUPTS(CLBR_NONE)
  455. TRACE_IRQS_OFF
  456. testl $3,CS-ARGOFFSET(%rsp)
  457. je retint_restore_args
  458. movl $_TIF_ALLWORK_MASK,%edi
  459. /* edi: mask to check */
  460. int_with_check:
  461. LOCKDEP_SYS_EXIT_IRQ
  462. GET_THREAD_INFO(%rcx)
  463. movl TI_flags(%rcx),%edx
  464. andl %edi,%edx
  465. jnz int_careful
  466. andl $~TS_COMPAT,TI_status(%rcx)
  467. jmp retint_swapgs
  468. /* Either reschedule or signal or syscall exit tracking needed. */
  469. /* First do a reschedule test. */
  470. /* edx: work, edi: workmask */
  471. int_careful:
  472. bt $TIF_NEED_RESCHED,%edx
  473. jnc int_very_careful
  474. TRACE_IRQS_ON
  475. ENABLE_INTERRUPTS(CLBR_NONE)
  476. pushq %rdi
  477. CFI_ADJUST_CFA_OFFSET 8
  478. call schedule
  479. popq %rdi
  480. CFI_ADJUST_CFA_OFFSET -8
  481. DISABLE_INTERRUPTS(CLBR_NONE)
  482. TRACE_IRQS_OFF
  483. jmp int_with_check
  484. /* handle signals and tracing -- both require a full stack frame */
  485. int_very_careful:
  486. TRACE_IRQS_ON
  487. ENABLE_INTERRUPTS(CLBR_NONE)
  488. SAVE_REST
  489. /* Check for syscall exit trace */
  490. testl $_TIF_WORK_SYSCALL_EXIT,%edx
  491. jz int_signal
  492. pushq %rdi
  493. CFI_ADJUST_CFA_OFFSET 8
  494. leaq 8(%rsp),%rdi # &ptregs -> arg1
  495. call syscall_trace_leave
  496. popq %rdi
  497. CFI_ADJUST_CFA_OFFSET -8
  498. andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
  499. jmp int_restore_rest
  500. int_signal:
  501. testl $_TIF_DO_NOTIFY_MASK,%edx
  502. jz 1f
  503. movq %rsp,%rdi # &ptregs -> arg1
  504. xorl %esi,%esi # oldset -> arg2
  505. call do_notify_resume
  506. 1: movl $_TIF_WORK_MASK,%edi
  507. int_restore_rest:
  508. RESTORE_REST
  509. DISABLE_INTERRUPTS(CLBR_NONE)
  510. TRACE_IRQS_OFF
  511. jmp int_with_check
  512. CFI_ENDPROC
  513. END(system_call)
  514. /*
  515. * Certain special system calls that need to save a complete full stack frame.
  516. */
  517. .macro PTREGSCALL label,func,arg
  518. .globl \label
  519. \label:
  520. leaq \func(%rip),%rax
  521. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  522. jmp ptregscall_common
  523. END(\label)
  524. .endm
  525. CFI_STARTPROC
  526. PTREGSCALL stub_clone, sys_clone, %r8
  527. PTREGSCALL stub_fork, sys_fork, %rdi
  528. PTREGSCALL stub_vfork, sys_vfork, %rdi
  529. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  530. PTREGSCALL stub_iopl, sys_iopl, %rsi
  531. ENTRY(ptregscall_common)
  532. popq %r11
  533. CFI_ADJUST_CFA_OFFSET -8
  534. CFI_REGISTER rip, r11
  535. SAVE_REST
  536. movq %r11, %r15
  537. CFI_REGISTER rip, r15
  538. FIXUP_TOP_OF_STACK %r11
  539. call *%rax
  540. RESTORE_TOP_OF_STACK %r11
  541. movq %r15, %r11
  542. CFI_REGISTER rip, r11
  543. RESTORE_REST
  544. pushq %r11
  545. CFI_ADJUST_CFA_OFFSET 8
  546. CFI_REL_OFFSET rip, 0
  547. ret
  548. CFI_ENDPROC
  549. END(ptregscall_common)
  550. ENTRY(stub_execve)
  551. CFI_STARTPROC
  552. popq %r11
  553. CFI_ADJUST_CFA_OFFSET -8
  554. CFI_REGISTER rip, r11
  555. SAVE_REST
  556. FIXUP_TOP_OF_STACK %r11
  557. movq %rsp, %rcx
  558. call sys_execve
  559. RESTORE_TOP_OF_STACK %r11
  560. movq %rax,RAX(%rsp)
  561. RESTORE_REST
  562. jmp int_ret_from_sys_call
  563. CFI_ENDPROC
  564. END(stub_execve)
  565. /*
  566. * sigreturn is special because it needs to restore all registers on return.
  567. * This cannot be done with SYSRET, so use the IRET return path instead.
  568. */
  569. ENTRY(stub_rt_sigreturn)
  570. CFI_STARTPROC
  571. addq $8, %rsp
  572. CFI_ADJUST_CFA_OFFSET -8
  573. SAVE_REST
  574. movq %rsp,%rdi
  575. FIXUP_TOP_OF_STACK %r11
  576. call sys_rt_sigreturn
  577. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  578. RESTORE_REST
  579. jmp int_ret_from_sys_call
  580. CFI_ENDPROC
  581. END(stub_rt_sigreturn)
  582. /*
  583. * initial frame state for interrupts and exceptions
  584. */
  585. .macro _frame ref
  586. CFI_STARTPROC simple
  587. CFI_SIGNAL_FRAME
  588. CFI_DEF_CFA rsp,SS+8-\ref
  589. /*CFI_REL_OFFSET ss,SS-\ref*/
  590. CFI_REL_OFFSET rsp,RSP-\ref
  591. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  592. /*CFI_REL_OFFSET cs,CS-\ref*/
  593. CFI_REL_OFFSET rip,RIP-\ref
  594. .endm
  595. /* initial frame state for interrupts (and exceptions without error code) */
  596. #define INTR_FRAME _frame RIP
  597. /* initial frame state for exceptions with error code (and interrupts with
  598. vector already pushed) */
  599. #define XCPT_FRAME _frame ORIG_RAX
  600. /*
  601. * Interrupt entry/exit.
  602. *
  603. * Interrupt entry points save only callee clobbered registers in fast path.
  604. *
  605. * Entry runs with interrupts off.
  606. */
  607. /* 0(%rsp): interrupt number */
  608. .macro interrupt func
  609. cld
  610. SAVE_ARGS
  611. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  612. pushq %rbp
  613. CFI_ADJUST_CFA_OFFSET 8
  614. CFI_REL_OFFSET rbp, 0
  615. movq %rsp,%rbp
  616. CFI_DEF_CFA_REGISTER rbp
  617. testl $3,CS(%rdi)
  618. je 1f
  619. SWAPGS
  620. /* irqcount is used to check if a CPU is already on an interrupt
  621. stack or not. While this is essentially redundant with preempt_count
  622. it is a little cheaper to use a separate counter in the PDA
  623. (short of moving irq_enter into assembly, which would be too
  624. much work) */
  625. 1: incl %gs:pda_irqcount
  626. cmoveq %gs:pda_irqstackptr,%rsp
  627. push %rbp # backlink for old unwinder
  628. /*
  629. * We entered an interrupt context - irqs are off:
  630. */
  631. TRACE_IRQS_OFF
  632. call \func
  633. .endm
  634. ENTRY(common_interrupt)
  635. XCPT_FRAME
  636. interrupt do_IRQ
  637. /* 0(%rsp): oldrsp-ARGOFFSET */
  638. ret_from_intr:
  639. DISABLE_INTERRUPTS(CLBR_NONE)
  640. TRACE_IRQS_OFF
  641. decl %gs:pda_irqcount
  642. leaveq
  643. CFI_DEF_CFA_REGISTER rsp
  644. CFI_ADJUST_CFA_OFFSET -8
  645. exit_intr:
  646. GET_THREAD_INFO(%rcx)
  647. testl $3,CS-ARGOFFSET(%rsp)
  648. je retint_kernel
  649. /* Interrupt came from user space */
  650. /*
  651. * Has a correct top of stack, but a partial stack frame
  652. * %rcx: thread info. Interrupts off.
  653. */
  654. retint_with_reschedule:
  655. movl $_TIF_WORK_MASK,%edi
  656. retint_check:
  657. LOCKDEP_SYS_EXIT_IRQ
  658. movl TI_flags(%rcx),%edx
  659. andl %edi,%edx
  660. CFI_REMEMBER_STATE
  661. jnz retint_careful
  662. retint_swapgs: /* return to user-space */
  663. /*
  664. * The iretq could re-enable interrupts:
  665. */
  666. DISABLE_INTERRUPTS(CLBR_ANY)
  667. TRACE_IRQS_IRETQ
  668. SWAPGS
  669. jmp restore_args
  670. retint_restore_args: /* return to kernel space */
  671. DISABLE_INTERRUPTS(CLBR_ANY)
  672. /*
  673. * The iretq could re-enable interrupts:
  674. */
  675. TRACE_IRQS_IRETQ
  676. restore_args:
  677. RESTORE_ARGS 0,8,0
  678. irq_return:
  679. INTERRUPT_RETURN
  680. .section __ex_table, "a"
  681. .quad irq_return, bad_iret
  682. .previous
  683. #ifdef CONFIG_PARAVIRT
  684. ENTRY(native_iret)
  685. iretq
  686. .section __ex_table,"a"
  687. .quad native_iret, bad_iret
  688. .previous
  689. #endif
  690. .section .fixup,"ax"
  691. bad_iret:
  692. /*
  693. * The iret traps when the %cs or %ss being restored is bogus.
  694. * We've lost the original trap vector and error code.
  695. * #GPF is the most likely one to get for an invalid selector.
  696. * So pretend we completed the iret and took the #GPF in user mode.
  697. *
  698. * We are now running with the kernel GS after exception recovery.
  699. * But error_entry expects us to have user GS to match the user %cs,
  700. * so swap back.
  701. */
  702. pushq $0
  703. SWAPGS
  704. jmp general_protection
  705. .previous
  706. /* edi: workmask, edx: work */
  707. retint_careful:
  708. CFI_RESTORE_STATE
  709. bt $TIF_NEED_RESCHED,%edx
  710. jnc retint_signal
  711. TRACE_IRQS_ON
  712. ENABLE_INTERRUPTS(CLBR_NONE)
  713. pushq %rdi
  714. CFI_ADJUST_CFA_OFFSET 8
  715. call schedule
  716. popq %rdi
  717. CFI_ADJUST_CFA_OFFSET -8
  718. GET_THREAD_INFO(%rcx)
  719. DISABLE_INTERRUPTS(CLBR_NONE)
  720. TRACE_IRQS_OFF
  721. jmp retint_check
  722. retint_signal:
  723. testl $_TIF_DO_NOTIFY_MASK,%edx
  724. jz retint_swapgs
  725. TRACE_IRQS_ON
  726. ENABLE_INTERRUPTS(CLBR_NONE)
  727. SAVE_REST
  728. movq $-1,ORIG_RAX(%rsp)
  729. xorl %esi,%esi # oldset
  730. movq %rsp,%rdi # &pt_regs
  731. call do_notify_resume
  732. RESTORE_REST
  733. DISABLE_INTERRUPTS(CLBR_NONE)
  734. TRACE_IRQS_OFF
  735. GET_THREAD_INFO(%rcx)
  736. jmp retint_with_reschedule
  737. #ifdef CONFIG_PREEMPT
  738. /* Returning to kernel space. Check if we need preemption */
  739. /* rcx: threadinfo. interrupts off. */
  740. ENTRY(retint_kernel)
  741. cmpl $0,TI_preempt_count(%rcx)
  742. jnz retint_restore_args
  743. bt $TIF_NEED_RESCHED,TI_flags(%rcx)
  744. jnc retint_restore_args
  745. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  746. jnc retint_restore_args
  747. call preempt_schedule_irq
  748. jmp exit_intr
  749. #endif
  750. CFI_ENDPROC
  751. END(common_interrupt)
  752. /*
  753. * APIC interrupts.
  754. */
  755. .macro apicinterrupt num,func
  756. INTR_FRAME
  757. pushq $~(\num)
  758. CFI_ADJUST_CFA_OFFSET 8
  759. interrupt \func
  760. jmp ret_from_intr
  761. CFI_ENDPROC
  762. .endm
  763. ENTRY(thermal_interrupt)
  764. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  765. END(thermal_interrupt)
  766. ENTRY(threshold_interrupt)
  767. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  768. END(threshold_interrupt)
  769. #ifdef CONFIG_SMP
  770. ENTRY(reschedule_interrupt)
  771. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  772. END(reschedule_interrupt)
  773. .macro INVALIDATE_ENTRY num
  774. ENTRY(invalidate_interrupt\num)
  775. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  776. END(invalidate_interrupt\num)
  777. .endm
  778. INVALIDATE_ENTRY 0
  779. INVALIDATE_ENTRY 1
  780. INVALIDATE_ENTRY 2
  781. INVALIDATE_ENTRY 3
  782. INVALIDATE_ENTRY 4
  783. INVALIDATE_ENTRY 5
  784. INVALIDATE_ENTRY 6
  785. INVALIDATE_ENTRY 7
  786. ENTRY(call_function_interrupt)
  787. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  788. END(call_function_interrupt)
  789. ENTRY(call_function_single_interrupt)
  790. apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
  791. END(call_function_single_interrupt)
  792. ENTRY(irq_move_cleanup_interrupt)
  793. apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
  794. END(irq_move_cleanup_interrupt)
  795. #endif
  796. ENTRY(apic_timer_interrupt)
  797. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  798. END(apic_timer_interrupt)
  799. ENTRY(uv_bau_message_intr1)
  800. apicinterrupt 220,uv_bau_message_interrupt
  801. END(uv_bau_message_intr1)
  802. ENTRY(error_interrupt)
  803. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  804. END(error_interrupt)
  805. ENTRY(spurious_interrupt)
  806. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  807. END(spurious_interrupt)
  808. /*
  809. * Exception entry points.
  810. */
  811. .macro zeroentry sym
  812. INTR_FRAME
  813. PARAVIRT_ADJUST_EXCEPTION_FRAME
  814. pushq $0 /* push error code/oldrax */
  815. CFI_ADJUST_CFA_OFFSET 8
  816. pushq %rax /* push real oldrax to the rdi slot */
  817. CFI_ADJUST_CFA_OFFSET 8
  818. CFI_REL_OFFSET rax,0
  819. leaq \sym(%rip),%rax
  820. jmp error_entry
  821. CFI_ENDPROC
  822. .endm
  823. .macro errorentry sym
  824. XCPT_FRAME
  825. PARAVIRT_ADJUST_EXCEPTION_FRAME
  826. pushq %rax
  827. CFI_ADJUST_CFA_OFFSET 8
  828. CFI_REL_OFFSET rax,0
  829. leaq \sym(%rip),%rax
  830. jmp error_entry
  831. CFI_ENDPROC
  832. .endm
  833. /* error code is on the stack already */
  834. /* handle NMI like exceptions that can happen everywhere */
  835. .macro paranoidentry sym, ist=0, irqtrace=1
  836. SAVE_ALL
  837. cld
  838. movl $1,%ebx
  839. movl $MSR_GS_BASE,%ecx
  840. rdmsr
  841. testl %edx,%edx
  842. js 1f
  843. SWAPGS
  844. xorl %ebx,%ebx
  845. 1:
  846. .if \ist
  847. movq %gs:pda_data_offset, %rbp
  848. .endif
  849. movq %rsp,%rdi
  850. movq ORIG_RAX(%rsp),%rsi
  851. movq $-1,ORIG_RAX(%rsp)
  852. .if \ist
  853. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  854. .endif
  855. call \sym
  856. .if \ist
  857. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  858. .endif
  859. DISABLE_INTERRUPTS(CLBR_NONE)
  860. .if \irqtrace
  861. TRACE_IRQS_OFF
  862. .endif
  863. .endm
  864. /*
  865. * "Paranoid" exit path from exception stack.
  866. * Paranoid because this is used by NMIs and cannot take
  867. * any kernel state for granted.
  868. * We don't do kernel preemption checks here, because only
  869. * NMI should be common and it does not enable IRQs and
  870. * cannot get reschedule ticks.
  871. *
  872. * "trace" is 0 for the NMI handler only, because irq-tracing
  873. * is fundamentally NMI-unsafe. (we cannot change the soft and
  874. * hard flags at once, atomically)
  875. */
  876. .macro paranoidexit trace=1
  877. /* ebx: no swapgs flag */
  878. paranoid_exit\trace:
  879. testl %ebx,%ebx /* swapgs needed? */
  880. jnz paranoid_restore\trace
  881. testl $3,CS(%rsp)
  882. jnz paranoid_userspace\trace
  883. paranoid_swapgs\trace:
  884. .if \trace
  885. TRACE_IRQS_IRETQ 0
  886. .endif
  887. SWAPGS_UNSAFE_STACK
  888. paranoid_restore\trace:
  889. RESTORE_ALL 8
  890. jmp irq_return
  891. paranoid_userspace\trace:
  892. GET_THREAD_INFO(%rcx)
  893. movl TI_flags(%rcx),%ebx
  894. andl $_TIF_WORK_MASK,%ebx
  895. jz paranoid_swapgs\trace
  896. movq %rsp,%rdi /* &pt_regs */
  897. call sync_regs
  898. movq %rax,%rsp /* switch stack for scheduling */
  899. testl $_TIF_NEED_RESCHED,%ebx
  900. jnz paranoid_schedule\trace
  901. movl %ebx,%edx /* arg3: thread flags */
  902. .if \trace
  903. TRACE_IRQS_ON
  904. .endif
  905. ENABLE_INTERRUPTS(CLBR_NONE)
  906. xorl %esi,%esi /* arg2: oldset */
  907. movq %rsp,%rdi /* arg1: &pt_regs */
  908. call do_notify_resume
  909. DISABLE_INTERRUPTS(CLBR_NONE)
  910. .if \trace
  911. TRACE_IRQS_OFF
  912. .endif
  913. jmp paranoid_userspace\trace
  914. paranoid_schedule\trace:
  915. .if \trace
  916. TRACE_IRQS_ON
  917. .endif
  918. ENABLE_INTERRUPTS(CLBR_ANY)
  919. call schedule
  920. DISABLE_INTERRUPTS(CLBR_ANY)
  921. .if \trace
  922. TRACE_IRQS_OFF
  923. .endif
  924. jmp paranoid_userspace\trace
  925. CFI_ENDPROC
  926. .endm
  927. /*
  928. * Exception entry point. This expects an error code/orig_rax on the stack
  929. * and the exception handler in %rax.
  930. */
  931. KPROBE_ENTRY(error_entry)
  932. _frame RDI
  933. CFI_REL_OFFSET rax,0
  934. /* rdi slot contains rax, oldrax contains error code */
  935. cld
  936. subq $14*8,%rsp
  937. CFI_ADJUST_CFA_OFFSET (14*8)
  938. movq %rsi,13*8(%rsp)
  939. CFI_REL_OFFSET rsi,RSI
  940. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  941. CFI_REGISTER rax,rsi
  942. movq %rdx,12*8(%rsp)
  943. CFI_REL_OFFSET rdx,RDX
  944. movq %rcx,11*8(%rsp)
  945. CFI_REL_OFFSET rcx,RCX
  946. movq %rsi,10*8(%rsp) /* store rax */
  947. CFI_REL_OFFSET rax,RAX
  948. movq %r8, 9*8(%rsp)
  949. CFI_REL_OFFSET r8,R8
  950. movq %r9, 8*8(%rsp)
  951. CFI_REL_OFFSET r9,R9
  952. movq %r10,7*8(%rsp)
  953. CFI_REL_OFFSET r10,R10
  954. movq %r11,6*8(%rsp)
  955. CFI_REL_OFFSET r11,R11
  956. movq %rbx,5*8(%rsp)
  957. CFI_REL_OFFSET rbx,RBX
  958. movq %rbp,4*8(%rsp)
  959. CFI_REL_OFFSET rbp,RBP
  960. movq %r12,3*8(%rsp)
  961. CFI_REL_OFFSET r12,R12
  962. movq %r13,2*8(%rsp)
  963. CFI_REL_OFFSET r13,R13
  964. movq %r14,1*8(%rsp)
  965. CFI_REL_OFFSET r14,R14
  966. movq %r15,(%rsp)
  967. CFI_REL_OFFSET r15,R15
  968. xorl %ebx,%ebx
  969. testl $3,CS(%rsp)
  970. je error_kernelspace
  971. error_swapgs:
  972. SWAPGS
  973. error_sti:
  974. movq %rdi,RDI(%rsp)
  975. CFI_REL_OFFSET rdi,RDI
  976. movq %rsp,%rdi
  977. movq ORIG_RAX(%rsp),%rsi /* get error code */
  978. movq $-1,ORIG_RAX(%rsp)
  979. call *%rax
  980. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  981. error_exit:
  982. movl %ebx,%eax
  983. RESTORE_REST
  984. DISABLE_INTERRUPTS(CLBR_NONE)
  985. TRACE_IRQS_OFF
  986. GET_THREAD_INFO(%rcx)
  987. testl %eax,%eax
  988. jne retint_kernel
  989. LOCKDEP_SYS_EXIT_IRQ
  990. movl TI_flags(%rcx),%edx
  991. movl $_TIF_WORK_MASK,%edi
  992. andl %edi,%edx
  993. jnz retint_careful
  994. jmp retint_swapgs
  995. CFI_ENDPROC
  996. error_kernelspace:
  997. incl %ebx
  998. /* There are two places in the kernel that can potentially fault with
  999. usergs. Handle them here. The exception handlers after
  1000. iret run with kernel gs again, so don't set the user space flag.
  1001. B stepping K8s sometimes report an truncated RIP for IRET
  1002. exceptions returning to compat mode. Check for these here too. */
  1003. leaq irq_return(%rip),%rcx
  1004. cmpq %rcx,RIP(%rsp)
  1005. je error_swapgs
  1006. movl %ecx,%ecx /* zero extend */
  1007. cmpq %rcx,RIP(%rsp)
  1008. je error_swapgs
  1009. cmpq $gs_change,RIP(%rsp)
  1010. je error_swapgs
  1011. jmp error_sti
  1012. KPROBE_END(error_entry)
  1013. /* Reload gs selector with exception handling */
  1014. /* edi: new selector */
  1015. ENTRY(native_load_gs_index)
  1016. CFI_STARTPROC
  1017. pushf
  1018. CFI_ADJUST_CFA_OFFSET 8
  1019. DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
  1020. SWAPGS
  1021. gs_change:
  1022. movl %edi,%gs
  1023. 2: mfence /* workaround */
  1024. SWAPGS
  1025. popf
  1026. CFI_ADJUST_CFA_OFFSET -8
  1027. ret
  1028. CFI_ENDPROC
  1029. ENDPROC(native_load_gs_index)
  1030. .section __ex_table,"a"
  1031. .align 8
  1032. .quad gs_change,bad_gs
  1033. .previous
  1034. .section .fixup,"ax"
  1035. /* running with kernelgs */
  1036. bad_gs:
  1037. SWAPGS /* switch back to user gs */
  1038. xorl %eax,%eax
  1039. movl %eax,%gs
  1040. jmp 2b
  1041. .previous
  1042. /*
  1043. * Create a kernel thread.
  1044. *
  1045. * C extern interface:
  1046. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  1047. *
  1048. * asm input arguments:
  1049. * rdi: fn, rsi: arg, rdx: flags
  1050. */
  1051. ENTRY(kernel_thread)
  1052. CFI_STARTPROC
  1053. FAKE_STACK_FRAME $child_rip
  1054. SAVE_ALL
  1055. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  1056. movq %rdx,%rdi
  1057. orq kernel_thread_flags(%rip),%rdi
  1058. movq $-1, %rsi
  1059. movq %rsp, %rdx
  1060. xorl %r8d,%r8d
  1061. xorl %r9d,%r9d
  1062. # clone now
  1063. call do_fork
  1064. movq %rax,RAX(%rsp)
  1065. xorl %edi,%edi
  1066. /*
  1067. * It isn't worth to check for reschedule here,
  1068. * so internally to the x86_64 port you can rely on kernel_thread()
  1069. * not to reschedule the child before returning, this avoids the need
  1070. * of hacks for example to fork off the per-CPU idle tasks.
  1071. * [Hopefully no generic code relies on the reschedule -AK]
  1072. */
  1073. RESTORE_ALL
  1074. UNFAKE_STACK_FRAME
  1075. ret
  1076. CFI_ENDPROC
  1077. ENDPROC(kernel_thread)
  1078. child_rip:
  1079. pushq $0 # fake return address
  1080. CFI_STARTPROC
  1081. /*
  1082. * Here we are in the child and the registers are set as they were
  1083. * at kernel_thread() invocation in the parent.
  1084. */
  1085. movq %rdi, %rax
  1086. movq %rsi, %rdi
  1087. call *%rax
  1088. # exit
  1089. mov %eax, %edi
  1090. call do_exit
  1091. CFI_ENDPROC
  1092. ENDPROC(child_rip)
  1093. /*
  1094. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  1095. *
  1096. * C extern interface:
  1097. * extern long execve(char *name, char **argv, char **envp)
  1098. *
  1099. * asm input arguments:
  1100. * rdi: name, rsi: argv, rdx: envp
  1101. *
  1102. * We want to fallback into:
  1103. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
  1104. *
  1105. * do_sys_execve asm fallback arguments:
  1106. * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
  1107. */
  1108. ENTRY(kernel_execve)
  1109. CFI_STARTPROC
  1110. FAKE_STACK_FRAME $0
  1111. SAVE_ALL
  1112. movq %rsp,%rcx
  1113. call sys_execve
  1114. movq %rax, RAX(%rsp)
  1115. RESTORE_REST
  1116. testq %rax,%rax
  1117. je int_ret_from_sys_call
  1118. RESTORE_ARGS
  1119. UNFAKE_STACK_FRAME
  1120. ret
  1121. CFI_ENDPROC
  1122. ENDPROC(kernel_execve)
  1123. KPROBE_ENTRY(page_fault)
  1124. errorentry do_page_fault
  1125. KPROBE_END(page_fault)
  1126. ENTRY(coprocessor_error)
  1127. zeroentry do_coprocessor_error
  1128. END(coprocessor_error)
  1129. ENTRY(simd_coprocessor_error)
  1130. zeroentry do_simd_coprocessor_error
  1131. END(simd_coprocessor_error)
  1132. ENTRY(device_not_available)
  1133. zeroentry math_state_restore
  1134. END(device_not_available)
  1135. /* runs on exception stack */
  1136. KPROBE_ENTRY(debug)
  1137. INTR_FRAME
  1138. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1139. pushq $0
  1140. CFI_ADJUST_CFA_OFFSET 8
  1141. paranoidentry do_debug, DEBUG_STACK
  1142. paranoidexit
  1143. KPROBE_END(debug)
  1144. /* runs on exception stack */
  1145. KPROBE_ENTRY(nmi)
  1146. INTR_FRAME
  1147. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1148. pushq $-1
  1149. CFI_ADJUST_CFA_OFFSET 8
  1150. paranoidentry do_nmi, 0, 0
  1151. #ifdef CONFIG_TRACE_IRQFLAGS
  1152. paranoidexit 0
  1153. #else
  1154. jmp paranoid_exit1
  1155. CFI_ENDPROC
  1156. #endif
  1157. KPROBE_END(nmi)
  1158. KPROBE_ENTRY(int3)
  1159. INTR_FRAME
  1160. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1161. pushq $0
  1162. CFI_ADJUST_CFA_OFFSET 8
  1163. paranoidentry do_int3, DEBUG_STACK
  1164. jmp paranoid_exit1
  1165. CFI_ENDPROC
  1166. KPROBE_END(int3)
  1167. ENTRY(overflow)
  1168. zeroentry do_overflow
  1169. END(overflow)
  1170. ENTRY(bounds)
  1171. zeroentry do_bounds
  1172. END(bounds)
  1173. ENTRY(invalid_op)
  1174. zeroentry do_invalid_op
  1175. END(invalid_op)
  1176. ENTRY(coprocessor_segment_overrun)
  1177. zeroentry do_coprocessor_segment_overrun
  1178. END(coprocessor_segment_overrun)
  1179. /* runs on exception stack */
  1180. ENTRY(double_fault)
  1181. XCPT_FRAME
  1182. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1183. paranoidentry do_double_fault
  1184. jmp paranoid_exit1
  1185. CFI_ENDPROC
  1186. END(double_fault)
  1187. ENTRY(invalid_TSS)
  1188. errorentry do_invalid_TSS
  1189. END(invalid_TSS)
  1190. ENTRY(segment_not_present)
  1191. errorentry do_segment_not_present
  1192. END(segment_not_present)
  1193. /* runs on exception stack */
  1194. ENTRY(stack_segment)
  1195. XCPT_FRAME
  1196. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1197. paranoidentry do_stack_segment
  1198. jmp paranoid_exit1
  1199. CFI_ENDPROC
  1200. END(stack_segment)
  1201. KPROBE_ENTRY(general_protection)
  1202. errorentry do_general_protection
  1203. KPROBE_END(general_protection)
  1204. ENTRY(alignment_check)
  1205. errorentry do_alignment_check
  1206. END(alignment_check)
  1207. ENTRY(divide_error)
  1208. zeroentry do_divide_error
  1209. END(divide_error)
  1210. ENTRY(spurious_interrupt_bug)
  1211. zeroentry do_spurious_interrupt_bug
  1212. END(spurious_interrupt_bug)
  1213. #ifdef CONFIG_X86_MCE
  1214. /* runs on exception stack */
  1215. ENTRY(machine_check)
  1216. INTR_FRAME
  1217. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1218. pushq $0
  1219. CFI_ADJUST_CFA_OFFSET 8
  1220. paranoidentry do_machine_check
  1221. jmp paranoid_exit1
  1222. CFI_ENDPROC
  1223. END(machine_check)
  1224. #endif
  1225. /* Call softirq on interrupt stack. Interrupts are off. */
  1226. ENTRY(call_softirq)
  1227. CFI_STARTPROC
  1228. push %rbp
  1229. CFI_ADJUST_CFA_OFFSET 8
  1230. CFI_REL_OFFSET rbp,0
  1231. mov %rsp,%rbp
  1232. CFI_DEF_CFA_REGISTER rbp
  1233. incl %gs:pda_irqcount
  1234. cmove %gs:pda_irqstackptr,%rsp
  1235. push %rbp # backlink for old unwinder
  1236. call __do_softirq
  1237. leaveq
  1238. CFI_DEF_CFA_REGISTER rsp
  1239. CFI_ADJUST_CFA_OFFSET -8
  1240. decl %gs:pda_irqcount
  1241. ret
  1242. CFI_ENDPROC
  1243. ENDPROC(call_softirq)
  1244. KPROBE_ENTRY(ignore_sysret)
  1245. CFI_STARTPROC
  1246. mov $-ENOSYS,%eax
  1247. sysret
  1248. CFI_ENDPROC
  1249. ENDPROC(ignore_sysret)
  1250. #ifdef CONFIG_XEN
  1251. ENTRY(xen_hypervisor_callback)
  1252. zeroentry xen_do_hypervisor_callback
  1253. END(xen_hypervisor_callback)
  1254. /*
  1255. # A note on the "critical region" in our callback handler.
  1256. # We want to avoid stacking callback handlers due to events occurring
  1257. # during handling of the last event. To do this, we keep events disabled
  1258. # until we've done all processing. HOWEVER, we must enable events before
  1259. # popping the stack frame (can't be done atomically) and so it would still
  1260. # be possible to get enough handler activations to overflow the stack.
  1261. # Although unlikely, bugs of that kind are hard to track down, so we'd
  1262. # like to avoid the possibility.
  1263. # So, on entry to the handler we detect whether we interrupted an
  1264. # existing activation in its critical region -- if so, we pop the current
  1265. # activation and restart the handler using the previous one.
  1266. */
  1267. ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
  1268. CFI_STARTPROC
  1269. /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
  1270. see the correct pointer to the pt_regs */
  1271. movq %rdi, %rsp # we don't return, adjust the stack frame
  1272. CFI_ENDPROC
  1273. CFI_DEFAULT_STACK
  1274. 11: incl %gs:pda_irqcount
  1275. movq %rsp,%rbp
  1276. CFI_DEF_CFA_REGISTER rbp
  1277. cmovzq %gs:pda_irqstackptr,%rsp
  1278. pushq %rbp # backlink for old unwinder
  1279. call xen_evtchn_do_upcall
  1280. popq %rsp
  1281. CFI_DEF_CFA_REGISTER rsp
  1282. decl %gs:pda_irqcount
  1283. jmp error_exit
  1284. CFI_ENDPROC
  1285. END(do_hypervisor_callback)
  1286. /*
  1287. # Hypervisor uses this for application faults while it executes.
  1288. # We get here for two reasons:
  1289. # 1. Fault while reloading DS, ES, FS or GS
  1290. # 2. Fault while executing IRET
  1291. # Category 1 we do not need to fix up as Xen has already reloaded all segment
  1292. # registers that could be reloaded and zeroed the others.
  1293. # Category 2 we fix up by killing the current process. We cannot use the
  1294. # normal Linux return path in this case because if we use the IRET hypercall
  1295. # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
  1296. # We distinguish between categories by comparing each saved segment register
  1297. # with its current contents: any discrepancy means we in category 1.
  1298. */
  1299. ENTRY(xen_failsafe_callback)
  1300. framesz = (RIP-0x30) /* workaround buggy gas */
  1301. _frame framesz
  1302. CFI_REL_OFFSET rcx, 0
  1303. CFI_REL_OFFSET r11, 8
  1304. movw %ds,%cx
  1305. cmpw %cx,0x10(%rsp)
  1306. CFI_REMEMBER_STATE
  1307. jne 1f
  1308. movw %es,%cx
  1309. cmpw %cx,0x18(%rsp)
  1310. jne 1f
  1311. movw %fs,%cx
  1312. cmpw %cx,0x20(%rsp)
  1313. jne 1f
  1314. movw %gs,%cx
  1315. cmpw %cx,0x28(%rsp)
  1316. jne 1f
  1317. /* All segments match their saved values => Category 2 (Bad IRET). */
  1318. movq (%rsp),%rcx
  1319. CFI_RESTORE rcx
  1320. movq 8(%rsp),%r11
  1321. CFI_RESTORE r11
  1322. addq $0x30,%rsp
  1323. CFI_ADJUST_CFA_OFFSET -0x30
  1324. pushq $0
  1325. CFI_ADJUST_CFA_OFFSET 8
  1326. pushq %r11
  1327. CFI_ADJUST_CFA_OFFSET 8
  1328. pushq %rcx
  1329. CFI_ADJUST_CFA_OFFSET 8
  1330. jmp general_protection
  1331. CFI_RESTORE_STATE
  1332. 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
  1333. movq (%rsp),%rcx
  1334. CFI_RESTORE rcx
  1335. movq 8(%rsp),%r11
  1336. CFI_RESTORE r11
  1337. addq $0x30,%rsp
  1338. CFI_ADJUST_CFA_OFFSET -0x30
  1339. pushq $0
  1340. CFI_ADJUST_CFA_OFFSET 8
  1341. SAVE_ALL
  1342. jmp error_exit
  1343. CFI_ENDPROC
  1344. END(xen_failsafe_callback)
  1345. #endif /* CONFIG_XEN */