entry_64.S 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. */
  8. /*
  9. * entry.S contains the system-call and fault low-level handling routines.
  10. *
  11. * NOTE: This code handles signal-recognition, which happens every time
  12. * after an interrupt and after each system call.
  13. *
  14. * Normal syscalls and interrupts don't save a full stack frame, this is
  15. * only done for syscall tracing, signals or fork/exec et.al.
  16. *
  17. * A note on terminology:
  18. * - top of stack: Architecture defined interrupt frame from SS to RIP
  19. * at the top of the kernel process stack.
  20. * - partial stack frame: partially saved registers upto R11.
  21. * - full stack frame: Like partial stack frame, but all register saved.
  22. *
  23. * Some macro usage:
  24. * - CFI macros are used to generate dwarf2 unwind information for better
  25. * backtraces. They don't change any code.
  26. * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
  27. * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
  28. * There are unfortunately lots of special cases where some registers
  29. * not touched. The macro is a big mess that should be cleaned up.
  30. * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  31. * Gives a full stack frame.
  32. * - ENTRY/END Define functions in the symbol table.
  33. * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  34. * frame that is otherwise undefined after a SYSCALL
  35. * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  36. * - errorentry/paranoidentry/zeroentry - Define exception entry points.
  37. */
  38. #include <linux/linkage.h>
  39. #include <asm/segment.h>
  40. #include <asm/cache.h>
  41. #include <asm/errno.h>
  42. #include <asm/dwarf2.h>
  43. #include <asm/calling.h>
  44. #include <asm/asm-offsets.h>
  45. #include <asm/msr.h>
  46. #include <asm/unistd.h>
  47. #include <asm/thread_info.h>
  48. #include <asm/hw_irq.h>
  49. #include <asm/page.h>
  50. #include <asm/irqflags.h>
  51. #include <asm/paravirt.h>
  52. #include <asm/ftrace.h>
  53. /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
  54. #include <linux/elf-em.h>
  55. #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
  56. #define __AUDIT_ARCH_64BIT 0x80000000
  57. #define __AUDIT_ARCH_LE 0x40000000
  58. .code64
  59. #ifdef CONFIG_FUNCTION_TRACER
  60. #ifdef CONFIG_DYNAMIC_FTRACE
  61. ENTRY(mcount)
  62. retq
  63. END(mcount)
  64. ENTRY(ftrace_caller)
  65. /* taken from glibc */
  66. subq $0x38, %rsp
  67. movq %rax, (%rsp)
  68. movq %rcx, 8(%rsp)
  69. movq %rdx, 16(%rsp)
  70. movq %rsi, 24(%rsp)
  71. movq %rdi, 32(%rsp)
  72. movq %r8, 40(%rsp)
  73. movq %r9, 48(%rsp)
  74. movq 0x38(%rsp), %rdi
  75. movq 8(%rbp), %rsi
  76. subq $MCOUNT_INSN_SIZE, %rdi
  77. .globl ftrace_call
  78. ftrace_call:
  79. call ftrace_stub
  80. movq 48(%rsp), %r9
  81. movq 40(%rsp), %r8
  82. movq 32(%rsp), %rdi
  83. movq 24(%rsp), %rsi
  84. movq 16(%rsp), %rdx
  85. movq 8(%rsp), %rcx
  86. movq (%rsp), %rax
  87. addq $0x38, %rsp
  88. .globl ftrace_stub
  89. ftrace_stub:
  90. retq
  91. END(ftrace_caller)
  92. #else /* ! CONFIG_DYNAMIC_FTRACE */
  93. ENTRY(mcount)
  94. cmpq $ftrace_stub, ftrace_trace_function
  95. jnz trace
  96. .globl ftrace_stub
  97. ftrace_stub:
  98. retq
  99. trace:
  100. /* taken from glibc */
  101. subq $0x38, %rsp
  102. movq %rax, (%rsp)
  103. movq %rcx, 8(%rsp)
  104. movq %rdx, 16(%rsp)
  105. movq %rsi, 24(%rsp)
  106. movq %rdi, 32(%rsp)
  107. movq %r8, 40(%rsp)
  108. movq %r9, 48(%rsp)
  109. movq 0x38(%rsp), %rdi
  110. movq 8(%rbp), %rsi
  111. subq $MCOUNT_INSN_SIZE, %rdi
  112. call *ftrace_trace_function
  113. movq 48(%rsp), %r9
  114. movq 40(%rsp), %r8
  115. movq 32(%rsp), %rdi
  116. movq 24(%rsp), %rsi
  117. movq 16(%rsp), %rdx
  118. movq 8(%rsp), %rcx
  119. movq (%rsp), %rax
  120. addq $0x38, %rsp
  121. jmp ftrace_stub
  122. END(mcount)
  123. #endif /* CONFIG_DYNAMIC_FTRACE */
  124. #endif /* CONFIG_FUNCTION_TRACER */
  125. #ifndef CONFIG_PREEMPT
  126. #define retint_kernel retint_restore_args
  127. #endif
  128. #ifdef CONFIG_PARAVIRT
  129. ENTRY(native_usergs_sysret64)
  130. swapgs
  131. sysretq
  132. #endif /* CONFIG_PARAVIRT */
  133. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  134. #ifdef CONFIG_TRACE_IRQFLAGS
  135. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  136. jnc 1f
  137. TRACE_IRQS_ON
  138. 1:
  139. #endif
  140. .endm
  141. /*
  142. * C code is not supposed to know about undefined top of stack. Every time
  143. * a C function with an pt_regs argument is called from the SYSCALL based
  144. * fast path FIXUP_TOP_OF_STACK is needed.
  145. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  146. * manipulation.
  147. */
  148. /* %rsp:at FRAMEEND */
  149. .macro FIXUP_TOP_OF_STACK tmp
  150. movq %gs:pda_oldrsp,\tmp
  151. movq \tmp,RSP(%rsp)
  152. movq $__USER_DS,SS(%rsp)
  153. movq $__USER_CS,CS(%rsp)
  154. movq $-1,RCX(%rsp)
  155. movq R11(%rsp),\tmp /* get eflags */
  156. movq \tmp,EFLAGS(%rsp)
  157. .endm
  158. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  159. movq RSP-\offset(%rsp),\tmp
  160. movq \tmp,%gs:pda_oldrsp
  161. movq EFLAGS-\offset(%rsp),\tmp
  162. movq \tmp,R11-\offset(%rsp)
  163. .endm
  164. .macro FAKE_STACK_FRAME child_rip
  165. /* push in order ss, rsp, eflags, cs, rip */
  166. xorl %eax, %eax
  167. pushq $__KERNEL_DS /* ss */
  168. CFI_ADJUST_CFA_OFFSET 8
  169. /*CFI_REL_OFFSET ss,0*/
  170. pushq %rax /* rsp */
  171. CFI_ADJUST_CFA_OFFSET 8
  172. CFI_REL_OFFSET rsp,0
  173. pushq $(1<<9) /* eflags - interrupts on */
  174. CFI_ADJUST_CFA_OFFSET 8
  175. /*CFI_REL_OFFSET rflags,0*/
  176. pushq $__KERNEL_CS /* cs */
  177. CFI_ADJUST_CFA_OFFSET 8
  178. /*CFI_REL_OFFSET cs,0*/
  179. pushq \child_rip /* rip */
  180. CFI_ADJUST_CFA_OFFSET 8
  181. CFI_REL_OFFSET rip,0
  182. pushq %rax /* orig rax */
  183. CFI_ADJUST_CFA_OFFSET 8
  184. .endm
  185. .macro UNFAKE_STACK_FRAME
  186. addq $8*6, %rsp
  187. CFI_ADJUST_CFA_OFFSET -(6*8)
  188. .endm
  189. .macro CFI_DEFAULT_STACK start=1
  190. .if \start
  191. CFI_STARTPROC simple
  192. CFI_SIGNAL_FRAME
  193. CFI_DEF_CFA rsp,SS+8
  194. .else
  195. CFI_DEF_CFA_OFFSET SS+8
  196. .endif
  197. CFI_REL_OFFSET r15,R15
  198. CFI_REL_OFFSET r14,R14
  199. CFI_REL_OFFSET r13,R13
  200. CFI_REL_OFFSET r12,R12
  201. CFI_REL_OFFSET rbp,RBP
  202. CFI_REL_OFFSET rbx,RBX
  203. CFI_REL_OFFSET r11,R11
  204. CFI_REL_OFFSET r10,R10
  205. CFI_REL_OFFSET r9,R9
  206. CFI_REL_OFFSET r8,R8
  207. CFI_REL_OFFSET rax,RAX
  208. CFI_REL_OFFSET rcx,RCX
  209. CFI_REL_OFFSET rdx,RDX
  210. CFI_REL_OFFSET rsi,RSI
  211. CFI_REL_OFFSET rdi,RDI
  212. CFI_REL_OFFSET rip,RIP
  213. /*CFI_REL_OFFSET cs,CS*/
  214. /*CFI_REL_OFFSET rflags,EFLAGS*/
  215. CFI_REL_OFFSET rsp,RSP
  216. /*CFI_REL_OFFSET ss,SS*/
  217. .endm
  218. /*
  219. * A newly forked process directly context switches into this.
  220. */
  221. /* rdi: prev */
  222. ENTRY(ret_from_fork)
  223. CFI_DEFAULT_STACK
  224. push kernel_eflags(%rip)
  225. CFI_ADJUST_CFA_OFFSET 8
  226. popf # reset kernel eflags
  227. CFI_ADJUST_CFA_OFFSET -8
  228. call schedule_tail
  229. GET_THREAD_INFO(%rcx)
  230. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  231. jnz rff_trace
  232. rff_action:
  233. RESTORE_REST
  234. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  235. je int_ret_from_sys_call
  236. testl $_TIF_IA32,TI_flags(%rcx)
  237. jnz int_ret_from_sys_call
  238. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  239. jmp ret_from_sys_call
  240. rff_trace:
  241. movq %rsp,%rdi
  242. call syscall_trace_leave
  243. GET_THREAD_INFO(%rcx)
  244. jmp rff_action
  245. CFI_ENDPROC
  246. END(ret_from_fork)
  247. /*
  248. * System call entry. Upto 6 arguments in registers are supported.
  249. *
  250. * SYSCALL does not save anything on the stack and does not change the
  251. * stack pointer.
  252. */
  253. /*
  254. * Register setup:
  255. * rax system call number
  256. * rdi arg0
  257. * rcx return address for syscall/sysret, C arg3
  258. * rsi arg1
  259. * rdx arg2
  260. * r10 arg3 (--> moved to rcx for C)
  261. * r8 arg4
  262. * r9 arg5
  263. * r11 eflags for syscall/sysret, temporary for C
  264. * r12-r15,rbp,rbx saved by C code, not touched.
  265. *
  266. * Interrupts are off on entry.
  267. * Only called from user space.
  268. *
  269. * XXX if we had a free scratch register we could save the RSP into the stack frame
  270. * and report it properly in ps. Unfortunately we haven't.
  271. *
  272. * When user can change the frames always force IRET. That is because
  273. * it deals with uncanonical addresses better. SYSRET has trouble
  274. * with them due to bugs in both AMD and Intel CPUs.
  275. */
  276. ENTRY(system_call)
  277. CFI_STARTPROC simple
  278. CFI_SIGNAL_FRAME
  279. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  280. CFI_REGISTER rip,rcx
  281. /*CFI_REGISTER rflags,r11*/
  282. SWAPGS_UNSAFE_STACK
  283. /*
  284. * A hypervisor implementation might want to use a label
  285. * after the swapgs, so that it can do the swapgs
  286. * for the guest and jump here on syscall.
  287. */
  288. ENTRY(system_call_after_swapgs)
  289. movq %rsp,%gs:pda_oldrsp
  290. movq %gs:pda_kernelstack,%rsp
  291. /*
  292. * No need to follow this irqs off/on section - it's straight
  293. * and short:
  294. */
  295. ENABLE_INTERRUPTS(CLBR_NONE)
  296. SAVE_ARGS 8,1
  297. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  298. movq %rcx,RIP-ARGOFFSET(%rsp)
  299. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  300. GET_THREAD_INFO(%rcx)
  301. testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
  302. jnz tracesys
  303. system_call_fastpath:
  304. cmpq $__NR_syscall_max,%rax
  305. ja badsys
  306. movq %r10,%rcx
  307. call *sys_call_table(,%rax,8) # XXX: rip relative
  308. movq %rax,RAX-ARGOFFSET(%rsp)
  309. /*
  310. * Syscall return path ending with SYSRET (fast path)
  311. * Has incomplete stack frame and undefined top of stack.
  312. */
  313. ret_from_sys_call:
  314. movl $_TIF_ALLWORK_MASK,%edi
  315. /* edi: flagmask */
  316. sysret_check:
  317. LOCKDEP_SYS_EXIT
  318. GET_THREAD_INFO(%rcx)
  319. DISABLE_INTERRUPTS(CLBR_NONE)
  320. TRACE_IRQS_OFF
  321. movl TI_flags(%rcx),%edx
  322. andl %edi,%edx
  323. jnz sysret_careful
  324. CFI_REMEMBER_STATE
  325. /*
  326. * sysretq will re-enable interrupts:
  327. */
  328. TRACE_IRQS_ON
  329. movq RIP-ARGOFFSET(%rsp),%rcx
  330. CFI_REGISTER rip,rcx
  331. RESTORE_ARGS 0,-ARG_SKIP,1
  332. /*CFI_REGISTER rflags,r11*/
  333. movq %gs:pda_oldrsp, %rsp
  334. USERGS_SYSRET64
  335. CFI_RESTORE_STATE
  336. /* Handle reschedules */
  337. /* edx: work, edi: workmask */
  338. sysret_careful:
  339. bt $TIF_NEED_RESCHED,%edx
  340. jnc sysret_signal
  341. TRACE_IRQS_ON
  342. ENABLE_INTERRUPTS(CLBR_NONE)
  343. pushq %rdi
  344. CFI_ADJUST_CFA_OFFSET 8
  345. call schedule
  346. popq %rdi
  347. CFI_ADJUST_CFA_OFFSET -8
  348. jmp sysret_check
  349. /* Handle a signal */
  350. sysret_signal:
  351. TRACE_IRQS_ON
  352. ENABLE_INTERRUPTS(CLBR_NONE)
  353. #ifdef CONFIG_AUDITSYSCALL
  354. bt $TIF_SYSCALL_AUDIT,%edx
  355. jc sysret_audit
  356. #endif
  357. /* edx: work flags (arg3) */
  358. leaq do_notify_resume(%rip),%rax
  359. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  360. xorl %esi,%esi # oldset -> arg2
  361. call ptregscall_common
  362. movl $_TIF_WORK_MASK,%edi
  363. /* Use IRET because user could have changed frame. This
  364. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  365. DISABLE_INTERRUPTS(CLBR_NONE)
  366. TRACE_IRQS_OFF
  367. jmp int_with_check
  368. badsys:
  369. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  370. jmp ret_from_sys_call
  371. #ifdef CONFIG_AUDITSYSCALL
  372. /*
  373. * Fast path for syscall audit without full syscall trace.
  374. * We just call audit_syscall_entry() directly, and then
  375. * jump back to the normal fast path.
  376. */
  377. auditsys:
  378. movq %r10,%r9 /* 6th arg: 4th syscall arg */
  379. movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
  380. movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
  381. movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
  382. movq %rax,%rsi /* 2nd arg: syscall number */
  383. movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
  384. call audit_syscall_entry
  385. LOAD_ARGS 0 /* reload call-clobbered registers */
  386. jmp system_call_fastpath
  387. /*
  388. * Return fast path for syscall audit. Call audit_syscall_exit()
  389. * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
  390. * masked off.
  391. */
  392. sysret_audit:
  393. movq %rax,%rsi /* second arg, syscall return value */
  394. cmpq $0,%rax /* is it < 0? */
  395. setl %al /* 1 if so, 0 if not */
  396. movzbl %al,%edi /* zero-extend that into %edi */
  397. inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
  398. call audit_syscall_exit
  399. movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
  400. jmp sysret_check
  401. #endif /* CONFIG_AUDITSYSCALL */
  402. /* Do syscall tracing */
  403. tracesys:
  404. #ifdef CONFIG_AUDITSYSCALL
  405. testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  406. jz auditsys
  407. #endif
  408. SAVE_REST
  409. movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
  410. FIXUP_TOP_OF_STACK %rdi
  411. movq %rsp,%rdi
  412. call syscall_trace_enter
  413. /*
  414. * Reload arg registers from stack in case ptrace changed them.
  415. * We don't reload %rax because syscall_trace_enter() returned
  416. * the value it wants us to use in the table lookup.
  417. */
  418. LOAD_ARGS ARGOFFSET, 1
  419. RESTORE_REST
  420. cmpq $__NR_syscall_max,%rax
  421. ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
  422. movq %r10,%rcx /* fixup for C */
  423. call *sys_call_table(,%rax,8)
  424. movq %rax,RAX-ARGOFFSET(%rsp)
  425. /* Use IRET because user could have changed frame */
  426. /*
  427. * Syscall return path ending with IRET.
  428. * Has correct top of stack, but partial stack frame.
  429. */
  430. .globl int_ret_from_sys_call
  431. .globl int_with_check
  432. int_ret_from_sys_call:
  433. DISABLE_INTERRUPTS(CLBR_NONE)
  434. TRACE_IRQS_OFF
  435. testl $3,CS-ARGOFFSET(%rsp)
  436. je retint_restore_args
  437. movl $_TIF_ALLWORK_MASK,%edi
  438. /* edi: mask to check */
  439. int_with_check:
  440. LOCKDEP_SYS_EXIT_IRQ
  441. GET_THREAD_INFO(%rcx)
  442. movl TI_flags(%rcx),%edx
  443. andl %edi,%edx
  444. jnz int_careful
  445. andl $~TS_COMPAT,TI_status(%rcx)
  446. jmp retint_swapgs
  447. /* Either reschedule or signal or syscall exit tracking needed. */
  448. /* First do a reschedule test. */
  449. /* edx: work, edi: workmask */
  450. int_careful:
  451. bt $TIF_NEED_RESCHED,%edx
  452. jnc int_very_careful
  453. TRACE_IRQS_ON
  454. ENABLE_INTERRUPTS(CLBR_NONE)
  455. pushq %rdi
  456. CFI_ADJUST_CFA_OFFSET 8
  457. call schedule
  458. popq %rdi
  459. CFI_ADJUST_CFA_OFFSET -8
  460. DISABLE_INTERRUPTS(CLBR_NONE)
  461. TRACE_IRQS_OFF
  462. jmp int_with_check
  463. /* handle signals and tracing -- both require a full stack frame */
  464. int_very_careful:
  465. TRACE_IRQS_ON
  466. ENABLE_INTERRUPTS(CLBR_NONE)
  467. SAVE_REST
  468. /* Check for syscall exit trace */
  469. testl $_TIF_WORK_SYSCALL_EXIT,%edx
  470. jz int_signal
  471. pushq %rdi
  472. CFI_ADJUST_CFA_OFFSET 8
  473. leaq 8(%rsp),%rdi # &ptregs -> arg1
  474. call syscall_trace_leave
  475. popq %rdi
  476. CFI_ADJUST_CFA_OFFSET -8
  477. andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
  478. jmp int_restore_rest
  479. int_signal:
  480. testl $_TIF_DO_NOTIFY_MASK,%edx
  481. jz 1f
  482. movq %rsp,%rdi # &ptregs -> arg1
  483. xorl %esi,%esi # oldset -> arg2
  484. call do_notify_resume
  485. 1: movl $_TIF_WORK_MASK,%edi
  486. int_restore_rest:
  487. RESTORE_REST
  488. DISABLE_INTERRUPTS(CLBR_NONE)
  489. TRACE_IRQS_OFF
  490. jmp int_with_check
  491. CFI_ENDPROC
  492. END(system_call)
  493. /*
  494. * Certain special system calls that need to save a complete full stack frame.
  495. */
  496. .macro PTREGSCALL label,func,arg
  497. .globl \label
  498. \label:
  499. leaq \func(%rip),%rax
  500. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  501. jmp ptregscall_common
  502. END(\label)
  503. .endm
  504. CFI_STARTPROC
  505. PTREGSCALL stub_clone, sys_clone, %r8
  506. PTREGSCALL stub_fork, sys_fork, %rdi
  507. PTREGSCALL stub_vfork, sys_vfork, %rdi
  508. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  509. PTREGSCALL stub_iopl, sys_iopl, %rsi
  510. ENTRY(ptregscall_common)
  511. popq %r11
  512. CFI_ADJUST_CFA_OFFSET -8
  513. CFI_REGISTER rip, r11
  514. SAVE_REST
  515. movq %r11, %r15
  516. CFI_REGISTER rip, r15
  517. FIXUP_TOP_OF_STACK %r11
  518. call *%rax
  519. RESTORE_TOP_OF_STACK %r11
  520. movq %r15, %r11
  521. CFI_REGISTER rip, r11
  522. RESTORE_REST
  523. pushq %r11
  524. CFI_ADJUST_CFA_OFFSET 8
  525. CFI_REL_OFFSET rip, 0
  526. ret
  527. CFI_ENDPROC
  528. END(ptregscall_common)
  529. ENTRY(stub_execve)
  530. CFI_STARTPROC
  531. popq %r11
  532. CFI_ADJUST_CFA_OFFSET -8
  533. CFI_REGISTER rip, r11
  534. SAVE_REST
  535. FIXUP_TOP_OF_STACK %r11
  536. movq %rsp, %rcx
  537. call sys_execve
  538. RESTORE_TOP_OF_STACK %r11
  539. movq %rax,RAX(%rsp)
  540. RESTORE_REST
  541. jmp int_ret_from_sys_call
  542. CFI_ENDPROC
  543. END(stub_execve)
  544. /*
  545. * sigreturn is special because it needs to restore all registers on return.
  546. * This cannot be done with SYSRET, so use the IRET return path instead.
  547. */
  548. ENTRY(stub_rt_sigreturn)
  549. CFI_STARTPROC
  550. addq $8, %rsp
  551. CFI_ADJUST_CFA_OFFSET -8
  552. SAVE_REST
  553. movq %rsp,%rdi
  554. FIXUP_TOP_OF_STACK %r11
  555. call sys_rt_sigreturn
  556. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  557. RESTORE_REST
  558. jmp int_ret_from_sys_call
  559. CFI_ENDPROC
  560. END(stub_rt_sigreturn)
  561. /*
  562. * initial frame state for interrupts and exceptions
  563. */
  564. .macro _frame ref
  565. CFI_STARTPROC simple
  566. CFI_SIGNAL_FRAME
  567. CFI_DEF_CFA rsp,SS+8-\ref
  568. /*CFI_REL_OFFSET ss,SS-\ref*/
  569. CFI_REL_OFFSET rsp,RSP-\ref
  570. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  571. /*CFI_REL_OFFSET cs,CS-\ref*/
  572. CFI_REL_OFFSET rip,RIP-\ref
  573. .endm
  574. /* initial frame state for interrupts (and exceptions without error code) */
  575. #define INTR_FRAME _frame RIP
  576. /* initial frame state for exceptions with error code (and interrupts with
  577. vector already pushed) */
  578. #define XCPT_FRAME _frame ORIG_RAX
  579. /*
  580. * Interrupt entry/exit.
  581. *
  582. * Interrupt entry points save only callee clobbered registers in fast path.
  583. *
  584. * Entry runs with interrupts off.
  585. */
  586. /* 0(%rsp): interrupt number */
  587. .macro interrupt func
  588. cld
  589. SAVE_ARGS
  590. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  591. pushq %rbp
  592. /*
  593. * Save rbp twice: One is for marking the stack frame, as usual, and the
  594. * other, to fill pt_regs properly. This is because bx comes right
  595. * before the last saved register in that structure, and not bp. If the
  596. * base pointer were in the place bx is today, this would not be needed.
  597. */
  598. movq %rbp, -8(%rsp)
  599. CFI_ADJUST_CFA_OFFSET 8
  600. CFI_REL_OFFSET rbp, 0
  601. movq %rsp,%rbp
  602. CFI_DEF_CFA_REGISTER rbp
  603. testl $3,CS(%rdi)
  604. je 1f
  605. SWAPGS
  606. /* irqcount is used to check if a CPU is already on an interrupt
  607. stack or not. While this is essentially redundant with preempt_count
  608. it is a little cheaper to use a separate counter in the PDA
  609. (short of moving irq_enter into assembly, which would be too
  610. much work) */
  611. 1: incl %gs:pda_irqcount
  612. cmoveq %gs:pda_irqstackptr,%rsp
  613. push %rbp # backlink for old unwinder
  614. /*
  615. * We entered an interrupt context - irqs are off:
  616. */
  617. TRACE_IRQS_OFF
  618. call \func
  619. .endm
  620. ENTRY(common_interrupt)
  621. XCPT_FRAME
  622. interrupt do_IRQ
  623. /* 0(%rsp): oldrsp-ARGOFFSET */
  624. ret_from_intr:
  625. DISABLE_INTERRUPTS(CLBR_NONE)
  626. TRACE_IRQS_OFF
  627. decl %gs:pda_irqcount
  628. leaveq
  629. CFI_DEF_CFA_REGISTER rsp
  630. CFI_ADJUST_CFA_OFFSET -8
  631. exit_intr:
  632. GET_THREAD_INFO(%rcx)
  633. testl $3,CS-ARGOFFSET(%rsp)
  634. je retint_kernel
  635. /* Interrupt came from user space */
  636. /*
  637. * Has a correct top of stack, but a partial stack frame
  638. * %rcx: thread info. Interrupts off.
  639. */
  640. retint_with_reschedule:
  641. movl $_TIF_WORK_MASK,%edi
  642. retint_check:
  643. LOCKDEP_SYS_EXIT_IRQ
  644. movl TI_flags(%rcx),%edx
  645. andl %edi,%edx
  646. CFI_REMEMBER_STATE
  647. jnz retint_careful
  648. retint_swapgs: /* return to user-space */
  649. /*
  650. * The iretq could re-enable interrupts:
  651. */
  652. DISABLE_INTERRUPTS(CLBR_ANY)
  653. TRACE_IRQS_IRETQ
  654. SWAPGS
  655. jmp restore_args
  656. retint_restore_args: /* return to kernel space */
  657. DISABLE_INTERRUPTS(CLBR_ANY)
  658. /*
  659. * The iretq could re-enable interrupts:
  660. */
  661. TRACE_IRQS_IRETQ
  662. restore_args:
  663. RESTORE_ARGS 0,8,0
  664. irq_return:
  665. INTERRUPT_RETURN
  666. .section __ex_table, "a"
  667. .quad irq_return, bad_iret
  668. .previous
  669. #ifdef CONFIG_PARAVIRT
  670. ENTRY(native_iret)
  671. iretq
  672. .section __ex_table,"a"
  673. .quad native_iret, bad_iret
  674. .previous
  675. #endif
  676. .section .fixup,"ax"
  677. bad_iret:
  678. /*
  679. * The iret traps when the %cs or %ss being restored is bogus.
  680. * We've lost the original trap vector and error code.
  681. * #GPF is the most likely one to get for an invalid selector.
  682. * So pretend we completed the iret and took the #GPF in user mode.
  683. *
  684. * We are now running with the kernel GS after exception recovery.
  685. * But error_entry expects us to have user GS to match the user %cs,
  686. * so swap back.
  687. */
  688. pushq $0
  689. SWAPGS
  690. jmp general_protection
  691. .previous
  692. /* edi: workmask, edx: work */
  693. retint_careful:
  694. CFI_RESTORE_STATE
  695. bt $TIF_NEED_RESCHED,%edx
  696. jnc retint_signal
  697. TRACE_IRQS_ON
  698. ENABLE_INTERRUPTS(CLBR_NONE)
  699. pushq %rdi
  700. CFI_ADJUST_CFA_OFFSET 8
  701. call schedule
  702. popq %rdi
  703. CFI_ADJUST_CFA_OFFSET -8
  704. GET_THREAD_INFO(%rcx)
  705. DISABLE_INTERRUPTS(CLBR_NONE)
  706. TRACE_IRQS_OFF
  707. jmp retint_check
  708. retint_signal:
  709. testl $_TIF_DO_NOTIFY_MASK,%edx
  710. jz retint_swapgs
  711. TRACE_IRQS_ON
  712. ENABLE_INTERRUPTS(CLBR_NONE)
  713. SAVE_REST
  714. movq $-1,ORIG_RAX(%rsp)
  715. xorl %esi,%esi # oldset
  716. movq %rsp,%rdi # &pt_regs
  717. call do_notify_resume
  718. RESTORE_REST
  719. DISABLE_INTERRUPTS(CLBR_NONE)
  720. TRACE_IRQS_OFF
  721. GET_THREAD_INFO(%rcx)
  722. jmp retint_with_reschedule
  723. #ifdef CONFIG_PREEMPT
  724. /* Returning to kernel space. Check if we need preemption */
  725. /* rcx: threadinfo. interrupts off. */
  726. ENTRY(retint_kernel)
  727. cmpl $0,TI_preempt_count(%rcx)
  728. jnz retint_restore_args
  729. bt $TIF_NEED_RESCHED,TI_flags(%rcx)
  730. jnc retint_restore_args
  731. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  732. jnc retint_restore_args
  733. call preempt_schedule_irq
  734. jmp exit_intr
  735. #endif
  736. CFI_ENDPROC
  737. END(common_interrupt)
  738. /*
  739. * APIC interrupts.
  740. */
  741. .macro apicinterrupt num,func
  742. INTR_FRAME
  743. pushq $~(\num)
  744. CFI_ADJUST_CFA_OFFSET 8
  745. interrupt \func
  746. jmp ret_from_intr
  747. CFI_ENDPROC
  748. .endm
  749. ENTRY(thermal_interrupt)
  750. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  751. END(thermal_interrupt)
  752. ENTRY(threshold_interrupt)
  753. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  754. END(threshold_interrupt)
  755. #ifdef CONFIG_SMP
  756. ENTRY(reschedule_interrupt)
  757. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  758. END(reschedule_interrupt)
  759. .macro INVALIDATE_ENTRY num
  760. ENTRY(invalidate_interrupt\num)
  761. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  762. END(invalidate_interrupt\num)
  763. .endm
  764. INVALIDATE_ENTRY 0
  765. INVALIDATE_ENTRY 1
  766. INVALIDATE_ENTRY 2
  767. INVALIDATE_ENTRY 3
  768. INVALIDATE_ENTRY 4
  769. INVALIDATE_ENTRY 5
  770. INVALIDATE_ENTRY 6
  771. INVALIDATE_ENTRY 7
  772. ENTRY(call_function_interrupt)
  773. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  774. END(call_function_interrupt)
  775. ENTRY(call_function_single_interrupt)
  776. apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
  777. END(call_function_single_interrupt)
  778. ENTRY(irq_move_cleanup_interrupt)
  779. apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
  780. END(irq_move_cleanup_interrupt)
  781. #endif
  782. ENTRY(apic_timer_interrupt)
  783. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  784. END(apic_timer_interrupt)
  785. ENTRY(uv_bau_message_intr1)
  786. apicinterrupt 220,uv_bau_message_interrupt
  787. END(uv_bau_message_intr1)
  788. ENTRY(error_interrupt)
  789. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  790. END(error_interrupt)
  791. ENTRY(spurious_interrupt)
  792. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  793. END(spurious_interrupt)
  794. /*
  795. * Exception entry points.
  796. */
  797. .macro zeroentry sym
  798. INTR_FRAME
  799. PARAVIRT_ADJUST_EXCEPTION_FRAME
  800. pushq $0 /* push error code/oldrax */
  801. CFI_ADJUST_CFA_OFFSET 8
  802. pushq %rax /* push real oldrax to the rdi slot */
  803. CFI_ADJUST_CFA_OFFSET 8
  804. CFI_REL_OFFSET rax,0
  805. leaq \sym(%rip),%rax
  806. jmp error_entry
  807. CFI_ENDPROC
  808. .endm
  809. .macro errorentry sym
  810. XCPT_FRAME
  811. PARAVIRT_ADJUST_EXCEPTION_FRAME
  812. pushq %rax
  813. CFI_ADJUST_CFA_OFFSET 8
  814. CFI_REL_OFFSET rax,0
  815. leaq \sym(%rip),%rax
  816. jmp error_entry
  817. CFI_ENDPROC
  818. .endm
  819. /* error code is on the stack already */
  820. /* handle NMI like exceptions that can happen everywhere */
  821. .macro paranoidentry sym, ist=0, irqtrace=1
  822. SAVE_ALL
  823. cld
  824. movl $1,%ebx
  825. movl $MSR_GS_BASE,%ecx
  826. rdmsr
  827. testl %edx,%edx
  828. js 1f
  829. SWAPGS
  830. xorl %ebx,%ebx
  831. 1:
  832. .if \ist
  833. movq %gs:pda_data_offset, %rbp
  834. .endif
  835. .if \irqtrace
  836. TRACE_IRQS_OFF
  837. .endif
  838. movq %rsp,%rdi
  839. movq ORIG_RAX(%rsp),%rsi
  840. movq $-1,ORIG_RAX(%rsp)
  841. .if \ist
  842. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  843. .endif
  844. call \sym
  845. .if \ist
  846. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  847. .endif
  848. DISABLE_INTERRUPTS(CLBR_NONE)
  849. .if \irqtrace
  850. TRACE_IRQS_OFF
  851. .endif
  852. .endm
  853. /*
  854. * "Paranoid" exit path from exception stack.
  855. * Paranoid because this is used by NMIs and cannot take
  856. * any kernel state for granted.
  857. * We don't do kernel preemption checks here, because only
  858. * NMI should be common and it does not enable IRQs and
  859. * cannot get reschedule ticks.
  860. *
  861. * "trace" is 0 for the NMI handler only, because irq-tracing
  862. * is fundamentally NMI-unsafe. (we cannot change the soft and
  863. * hard flags at once, atomically)
  864. */
  865. .macro paranoidexit trace=1
  866. /* ebx: no swapgs flag */
  867. paranoid_exit\trace:
  868. testl %ebx,%ebx /* swapgs needed? */
  869. jnz paranoid_restore\trace
  870. testl $3,CS(%rsp)
  871. jnz paranoid_userspace\trace
  872. paranoid_swapgs\trace:
  873. .if \trace
  874. TRACE_IRQS_IRETQ 0
  875. .endif
  876. SWAPGS_UNSAFE_STACK
  877. paranoid_restore\trace:
  878. RESTORE_ALL 8
  879. jmp irq_return
  880. paranoid_userspace\trace:
  881. GET_THREAD_INFO(%rcx)
  882. movl TI_flags(%rcx),%ebx
  883. andl $_TIF_WORK_MASK,%ebx
  884. jz paranoid_swapgs\trace
  885. movq %rsp,%rdi /* &pt_regs */
  886. call sync_regs
  887. movq %rax,%rsp /* switch stack for scheduling */
  888. testl $_TIF_NEED_RESCHED,%ebx
  889. jnz paranoid_schedule\trace
  890. movl %ebx,%edx /* arg3: thread flags */
  891. .if \trace
  892. TRACE_IRQS_ON
  893. .endif
  894. ENABLE_INTERRUPTS(CLBR_NONE)
  895. xorl %esi,%esi /* arg2: oldset */
  896. movq %rsp,%rdi /* arg1: &pt_regs */
  897. call do_notify_resume
  898. DISABLE_INTERRUPTS(CLBR_NONE)
  899. .if \trace
  900. TRACE_IRQS_OFF
  901. .endif
  902. jmp paranoid_userspace\trace
  903. paranoid_schedule\trace:
  904. .if \trace
  905. TRACE_IRQS_ON
  906. .endif
  907. ENABLE_INTERRUPTS(CLBR_ANY)
  908. call schedule
  909. DISABLE_INTERRUPTS(CLBR_ANY)
  910. .if \trace
  911. TRACE_IRQS_OFF
  912. .endif
  913. jmp paranoid_userspace\trace
  914. CFI_ENDPROC
  915. .endm
  916. /*
  917. * Exception entry point. This expects an error code/orig_rax on the stack
  918. * and the exception handler in %rax.
  919. */
  920. KPROBE_ENTRY(error_entry)
  921. _frame RDI
  922. CFI_REL_OFFSET rax,0
  923. /* rdi slot contains rax, oldrax contains error code */
  924. cld
  925. subq $14*8,%rsp
  926. CFI_ADJUST_CFA_OFFSET (14*8)
  927. movq %rsi,13*8(%rsp)
  928. CFI_REL_OFFSET rsi,RSI
  929. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  930. CFI_REGISTER rax,rsi
  931. movq %rdx,12*8(%rsp)
  932. CFI_REL_OFFSET rdx,RDX
  933. movq %rcx,11*8(%rsp)
  934. CFI_REL_OFFSET rcx,RCX
  935. movq %rsi,10*8(%rsp) /* store rax */
  936. CFI_REL_OFFSET rax,RAX
  937. movq %r8, 9*8(%rsp)
  938. CFI_REL_OFFSET r8,R8
  939. movq %r9, 8*8(%rsp)
  940. CFI_REL_OFFSET r9,R9
  941. movq %r10,7*8(%rsp)
  942. CFI_REL_OFFSET r10,R10
  943. movq %r11,6*8(%rsp)
  944. CFI_REL_OFFSET r11,R11
  945. movq %rbx,5*8(%rsp)
  946. CFI_REL_OFFSET rbx,RBX
  947. movq %rbp,4*8(%rsp)
  948. CFI_REL_OFFSET rbp,RBP
  949. movq %r12,3*8(%rsp)
  950. CFI_REL_OFFSET r12,R12
  951. movq %r13,2*8(%rsp)
  952. CFI_REL_OFFSET r13,R13
  953. movq %r14,1*8(%rsp)
  954. CFI_REL_OFFSET r14,R14
  955. movq %r15,(%rsp)
  956. CFI_REL_OFFSET r15,R15
  957. xorl %ebx,%ebx
  958. testl $3,CS(%rsp)
  959. je error_kernelspace
  960. error_swapgs:
  961. SWAPGS
  962. error_sti:
  963. TRACE_IRQS_OFF
  964. movq %rdi,RDI(%rsp)
  965. CFI_REL_OFFSET rdi,RDI
  966. movq %rsp,%rdi
  967. movq ORIG_RAX(%rsp),%rsi /* get error code */
  968. movq $-1,ORIG_RAX(%rsp)
  969. call *%rax
  970. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  971. error_exit:
  972. movl %ebx,%eax
  973. RESTORE_REST
  974. DISABLE_INTERRUPTS(CLBR_NONE)
  975. TRACE_IRQS_OFF
  976. GET_THREAD_INFO(%rcx)
  977. testl %eax,%eax
  978. jne retint_kernel
  979. LOCKDEP_SYS_EXIT_IRQ
  980. movl TI_flags(%rcx),%edx
  981. movl $_TIF_WORK_MASK,%edi
  982. andl %edi,%edx
  983. jnz retint_careful
  984. jmp retint_swapgs
  985. CFI_ENDPROC
  986. error_kernelspace:
  987. incl %ebx
  988. /* There are two places in the kernel that can potentially fault with
  989. usergs. Handle them here. The exception handlers after
  990. iret run with kernel gs again, so don't set the user space flag.
  991. B stepping K8s sometimes report an truncated RIP for IRET
  992. exceptions returning to compat mode. Check for these here too. */
  993. leaq irq_return(%rip),%rcx
  994. cmpq %rcx,RIP(%rsp)
  995. je error_swapgs
  996. movl %ecx,%ecx /* zero extend */
  997. cmpq %rcx,RIP(%rsp)
  998. je error_swapgs
  999. cmpq $gs_change,RIP(%rsp)
  1000. je error_swapgs
  1001. jmp error_sti
  1002. KPROBE_END(error_entry)
  1003. /* Reload gs selector with exception handling */
  1004. /* edi: new selector */
  1005. ENTRY(native_load_gs_index)
  1006. CFI_STARTPROC
  1007. pushf
  1008. CFI_ADJUST_CFA_OFFSET 8
  1009. DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
  1010. SWAPGS
  1011. gs_change:
  1012. movl %edi,%gs
  1013. 2: mfence /* workaround */
  1014. SWAPGS
  1015. popf
  1016. CFI_ADJUST_CFA_OFFSET -8
  1017. ret
  1018. CFI_ENDPROC
  1019. ENDPROC(native_load_gs_index)
  1020. .section __ex_table,"a"
  1021. .align 8
  1022. .quad gs_change,bad_gs
  1023. .previous
  1024. .section .fixup,"ax"
  1025. /* running with kernelgs */
  1026. bad_gs:
  1027. SWAPGS /* switch back to user gs */
  1028. xorl %eax,%eax
  1029. movl %eax,%gs
  1030. jmp 2b
  1031. .previous
  1032. /*
  1033. * Create a kernel thread.
  1034. *
  1035. * C extern interface:
  1036. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  1037. *
  1038. * asm input arguments:
  1039. * rdi: fn, rsi: arg, rdx: flags
  1040. */
  1041. ENTRY(kernel_thread)
  1042. CFI_STARTPROC
  1043. FAKE_STACK_FRAME $child_rip
  1044. SAVE_ALL
  1045. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  1046. movq %rdx,%rdi
  1047. orq kernel_thread_flags(%rip),%rdi
  1048. movq $-1, %rsi
  1049. movq %rsp, %rdx
  1050. xorl %r8d,%r8d
  1051. xorl %r9d,%r9d
  1052. # clone now
  1053. call do_fork
  1054. movq %rax,RAX(%rsp)
  1055. xorl %edi,%edi
  1056. /*
  1057. * It isn't worth to check for reschedule here,
  1058. * so internally to the x86_64 port you can rely on kernel_thread()
  1059. * not to reschedule the child before returning, this avoids the need
  1060. * of hacks for example to fork off the per-CPU idle tasks.
  1061. * [Hopefully no generic code relies on the reschedule -AK]
  1062. */
  1063. RESTORE_ALL
  1064. UNFAKE_STACK_FRAME
  1065. ret
  1066. CFI_ENDPROC
  1067. ENDPROC(kernel_thread)
  1068. child_rip:
  1069. pushq $0 # fake return address
  1070. CFI_STARTPROC
  1071. /*
  1072. * Here we are in the child and the registers are set as they were
  1073. * at kernel_thread() invocation in the parent.
  1074. */
  1075. movq %rdi, %rax
  1076. movq %rsi, %rdi
  1077. call *%rax
  1078. # exit
  1079. mov %eax, %edi
  1080. call do_exit
  1081. CFI_ENDPROC
  1082. ENDPROC(child_rip)
  1083. /*
  1084. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  1085. *
  1086. * C extern interface:
  1087. * extern long execve(char *name, char **argv, char **envp)
  1088. *
  1089. * asm input arguments:
  1090. * rdi: name, rsi: argv, rdx: envp
  1091. *
  1092. * We want to fallback into:
  1093. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
  1094. *
  1095. * do_sys_execve asm fallback arguments:
  1096. * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
  1097. */
  1098. ENTRY(kernel_execve)
  1099. CFI_STARTPROC
  1100. FAKE_STACK_FRAME $0
  1101. SAVE_ALL
  1102. movq %rsp,%rcx
  1103. call sys_execve
  1104. movq %rax, RAX(%rsp)
  1105. RESTORE_REST
  1106. testq %rax,%rax
  1107. je int_ret_from_sys_call
  1108. RESTORE_ARGS
  1109. UNFAKE_STACK_FRAME
  1110. ret
  1111. CFI_ENDPROC
  1112. ENDPROC(kernel_execve)
  1113. KPROBE_ENTRY(page_fault)
  1114. errorentry do_page_fault
  1115. KPROBE_END(page_fault)
  1116. ENTRY(coprocessor_error)
  1117. zeroentry do_coprocessor_error
  1118. END(coprocessor_error)
  1119. ENTRY(simd_coprocessor_error)
  1120. zeroentry do_simd_coprocessor_error
  1121. END(simd_coprocessor_error)
  1122. ENTRY(device_not_available)
  1123. zeroentry do_device_not_available
  1124. END(device_not_available)
  1125. /* runs on exception stack */
  1126. KPROBE_ENTRY(debug)
  1127. INTR_FRAME
  1128. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1129. pushq $0
  1130. CFI_ADJUST_CFA_OFFSET 8
  1131. paranoidentry do_debug, DEBUG_STACK
  1132. paranoidexit
  1133. KPROBE_END(debug)
  1134. /* runs on exception stack */
  1135. KPROBE_ENTRY(nmi)
  1136. INTR_FRAME
  1137. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1138. pushq $-1
  1139. CFI_ADJUST_CFA_OFFSET 8
  1140. paranoidentry do_nmi, 0, 0
  1141. #ifdef CONFIG_TRACE_IRQFLAGS
  1142. paranoidexit 0
  1143. #else
  1144. jmp paranoid_exit1
  1145. CFI_ENDPROC
  1146. #endif
  1147. KPROBE_END(nmi)
  1148. KPROBE_ENTRY(int3)
  1149. INTR_FRAME
  1150. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1151. pushq $0
  1152. CFI_ADJUST_CFA_OFFSET 8
  1153. paranoidentry do_int3, DEBUG_STACK
  1154. jmp paranoid_exit1
  1155. CFI_ENDPROC
  1156. KPROBE_END(int3)
  1157. ENTRY(overflow)
  1158. zeroentry do_overflow
  1159. END(overflow)
  1160. ENTRY(bounds)
  1161. zeroentry do_bounds
  1162. END(bounds)
  1163. ENTRY(invalid_op)
  1164. zeroentry do_invalid_op
  1165. END(invalid_op)
  1166. ENTRY(coprocessor_segment_overrun)
  1167. zeroentry do_coprocessor_segment_overrun
  1168. END(coprocessor_segment_overrun)
  1169. /* runs on exception stack */
  1170. ENTRY(double_fault)
  1171. XCPT_FRAME
  1172. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1173. paranoidentry do_double_fault
  1174. jmp paranoid_exit1
  1175. CFI_ENDPROC
  1176. END(double_fault)
  1177. ENTRY(invalid_TSS)
  1178. errorentry do_invalid_TSS
  1179. END(invalid_TSS)
  1180. ENTRY(segment_not_present)
  1181. errorentry do_segment_not_present
  1182. END(segment_not_present)
  1183. /* runs on exception stack */
  1184. ENTRY(stack_segment)
  1185. XCPT_FRAME
  1186. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1187. paranoidentry do_stack_segment
  1188. jmp paranoid_exit1
  1189. CFI_ENDPROC
  1190. END(stack_segment)
  1191. KPROBE_ENTRY(general_protection)
  1192. errorentry do_general_protection
  1193. KPROBE_END(general_protection)
  1194. ENTRY(alignment_check)
  1195. errorentry do_alignment_check
  1196. END(alignment_check)
  1197. ENTRY(divide_error)
  1198. zeroentry do_divide_error
  1199. END(divide_error)
  1200. ENTRY(spurious_interrupt_bug)
  1201. zeroentry do_spurious_interrupt_bug
  1202. END(spurious_interrupt_bug)
  1203. #ifdef CONFIG_X86_MCE
  1204. /* runs on exception stack */
  1205. ENTRY(machine_check)
  1206. INTR_FRAME
  1207. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1208. pushq $0
  1209. CFI_ADJUST_CFA_OFFSET 8
  1210. paranoidentry do_machine_check
  1211. jmp paranoid_exit1
  1212. CFI_ENDPROC
  1213. END(machine_check)
  1214. #endif
  1215. /* Call softirq on interrupt stack. Interrupts are off. */
  1216. ENTRY(call_softirq)
  1217. CFI_STARTPROC
  1218. push %rbp
  1219. CFI_ADJUST_CFA_OFFSET 8
  1220. CFI_REL_OFFSET rbp,0
  1221. mov %rsp,%rbp
  1222. CFI_DEF_CFA_REGISTER rbp
  1223. incl %gs:pda_irqcount
  1224. cmove %gs:pda_irqstackptr,%rsp
  1225. push %rbp # backlink for old unwinder
  1226. call __do_softirq
  1227. leaveq
  1228. CFI_DEF_CFA_REGISTER rsp
  1229. CFI_ADJUST_CFA_OFFSET -8
  1230. decl %gs:pda_irqcount
  1231. ret
  1232. CFI_ENDPROC
  1233. ENDPROC(call_softirq)
  1234. KPROBE_ENTRY(ignore_sysret)
  1235. CFI_STARTPROC
  1236. mov $-ENOSYS,%eax
  1237. sysret
  1238. CFI_ENDPROC
  1239. ENDPROC(ignore_sysret)
  1240. #ifdef CONFIG_XEN
  1241. ENTRY(xen_hypervisor_callback)
  1242. zeroentry xen_do_hypervisor_callback
  1243. END(xen_hypervisor_callback)
  1244. /*
  1245. # A note on the "critical region" in our callback handler.
  1246. # We want to avoid stacking callback handlers due to events occurring
  1247. # during handling of the last event. To do this, we keep events disabled
  1248. # until we've done all processing. HOWEVER, we must enable events before
  1249. # popping the stack frame (can't be done atomically) and so it would still
  1250. # be possible to get enough handler activations to overflow the stack.
  1251. # Although unlikely, bugs of that kind are hard to track down, so we'd
  1252. # like to avoid the possibility.
  1253. # So, on entry to the handler we detect whether we interrupted an
  1254. # existing activation in its critical region -- if so, we pop the current
  1255. # activation and restart the handler using the previous one.
  1256. */
  1257. ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
  1258. CFI_STARTPROC
  1259. /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
  1260. see the correct pointer to the pt_regs */
  1261. movq %rdi, %rsp # we don't return, adjust the stack frame
  1262. CFI_ENDPROC
  1263. CFI_DEFAULT_STACK
  1264. 11: incl %gs:pda_irqcount
  1265. movq %rsp,%rbp
  1266. CFI_DEF_CFA_REGISTER rbp
  1267. cmovzq %gs:pda_irqstackptr,%rsp
  1268. pushq %rbp # backlink for old unwinder
  1269. call xen_evtchn_do_upcall
  1270. popq %rsp
  1271. CFI_DEF_CFA_REGISTER rsp
  1272. decl %gs:pda_irqcount
  1273. jmp error_exit
  1274. CFI_ENDPROC
  1275. END(do_hypervisor_callback)
  1276. /*
  1277. # Hypervisor uses this for application faults while it executes.
  1278. # We get here for two reasons:
  1279. # 1. Fault while reloading DS, ES, FS or GS
  1280. # 2. Fault while executing IRET
  1281. # Category 1 we do not need to fix up as Xen has already reloaded all segment
  1282. # registers that could be reloaded and zeroed the others.
  1283. # Category 2 we fix up by killing the current process. We cannot use the
  1284. # normal Linux return path in this case because if we use the IRET hypercall
  1285. # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
  1286. # We distinguish between categories by comparing each saved segment register
  1287. # with its current contents: any discrepancy means we in category 1.
  1288. */
  1289. ENTRY(xen_failsafe_callback)
  1290. framesz = (RIP-0x30) /* workaround buggy gas */
  1291. _frame framesz
  1292. CFI_REL_OFFSET rcx, 0
  1293. CFI_REL_OFFSET r11, 8
  1294. movw %ds,%cx
  1295. cmpw %cx,0x10(%rsp)
  1296. CFI_REMEMBER_STATE
  1297. jne 1f
  1298. movw %es,%cx
  1299. cmpw %cx,0x18(%rsp)
  1300. jne 1f
  1301. movw %fs,%cx
  1302. cmpw %cx,0x20(%rsp)
  1303. jne 1f
  1304. movw %gs,%cx
  1305. cmpw %cx,0x28(%rsp)
  1306. jne 1f
  1307. /* All segments match their saved values => Category 2 (Bad IRET). */
  1308. movq (%rsp),%rcx
  1309. CFI_RESTORE rcx
  1310. movq 8(%rsp),%r11
  1311. CFI_RESTORE r11
  1312. addq $0x30,%rsp
  1313. CFI_ADJUST_CFA_OFFSET -0x30
  1314. pushq $0
  1315. CFI_ADJUST_CFA_OFFSET 8
  1316. pushq %r11
  1317. CFI_ADJUST_CFA_OFFSET 8
  1318. pushq %rcx
  1319. CFI_ADJUST_CFA_OFFSET 8
  1320. jmp general_protection
  1321. CFI_RESTORE_STATE
  1322. 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
  1323. movq (%rsp),%rcx
  1324. CFI_RESTORE rcx
  1325. movq 8(%rsp),%r11
  1326. CFI_RESTORE r11
  1327. addq $0x30,%rsp
  1328. CFI_ADJUST_CFA_OFFSET -0x30
  1329. pushq $0
  1330. CFI_ADJUST_CFA_OFFSET 8
  1331. SAVE_ALL
  1332. jmp error_exit
  1333. CFI_ENDPROC
  1334. END(xen_failsafe_callback)
  1335. #endif /* CONFIG_XEN */