entry_64.S 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. */
  8. /*
  9. * entry.S contains the system-call and fault low-level handling routines.
  10. *
  11. * NOTE: This code handles signal-recognition, which happens every time
  12. * after an interrupt and after each system call.
  13. *
  14. * Normal syscalls and interrupts don't save a full stack frame, this is
  15. * only done for syscall tracing, signals or fork/exec et.al.
  16. *
  17. * A note on terminology:
  18. * - top of stack: Architecture defined interrupt frame from SS to RIP
  19. * at the top of the kernel process stack.
  20. * - partial stack frame: partially saved registers upto R11.
  21. * - full stack frame: Like partial stack frame, but all register saved.
  22. *
  23. * Some macro usage:
  24. * - CFI macros are used to generate dwarf2 unwind information for better
  25. * backtraces. They don't change any code.
  26. * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
  27. * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
  28. * There are unfortunately lots of special cases where some registers
  29. * not touched. The macro is a big mess that should be cleaned up.
  30. * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  31. * Gives a full stack frame.
  32. * - ENTRY/END Define functions in the symbol table.
  33. * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  34. * frame that is otherwise undefined after a SYSCALL
  35. * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  36. * - errorentry/paranoidentry/zeroentry - Define exception entry points.
  37. */
  38. #include <linux/linkage.h>
  39. #include <asm/segment.h>
  40. #include <asm/cache.h>
  41. #include <asm/errno.h>
  42. #include <asm/dwarf2.h>
  43. #include <asm/calling.h>
  44. #include <asm/asm-offsets.h>
  45. #include <asm/msr.h>
  46. #include <asm/unistd.h>
  47. #include <asm/thread_info.h>
  48. #include <asm/hw_irq.h>
  49. #include <asm/page.h>
  50. #include <asm/irqflags.h>
  51. #include <asm/paravirt.h>
  52. #include <asm/ftrace.h>
  53. /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
  54. #include <linux/elf-em.h>
  55. #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
  56. #define __AUDIT_ARCH_64BIT 0x80000000
  57. #define __AUDIT_ARCH_LE 0x40000000
  58. .code64
  59. #ifdef CONFIG_FUNCTION_TRACER
  60. #ifdef CONFIG_DYNAMIC_FTRACE
  61. ENTRY(mcount)
  62. retq
  63. END(mcount)
  64. ENTRY(ftrace_caller)
  65. cmpl $0, function_trace_stop
  66. jne ftrace_stub
  67. MCOUNT_SAVE_FRAME
  68. movq 0x38(%rsp), %rdi
  69. movq 8(%rbp), %rsi
  70. subq $MCOUNT_INSN_SIZE, %rdi
  71. .globl ftrace_call
  72. ftrace_call:
  73. call ftrace_stub
  74. MCOUNT_RESTORE_FRAME
  75. #ifdef CONFIG_FUNCTION_GRAPH_TRACER
  76. .globl ftrace_graph_call
  77. ftrace_graph_call:
  78. jmp ftrace_stub
  79. #endif
  80. .globl ftrace_stub
  81. ftrace_stub:
  82. retq
  83. END(ftrace_caller)
  84. #else /* ! CONFIG_DYNAMIC_FTRACE */
  85. ENTRY(mcount)
  86. cmpl $0, function_trace_stop
  87. jne ftrace_stub
  88. cmpq $ftrace_stub, ftrace_trace_function
  89. jnz trace
  90. #ifdef CONFIG_FUNCTION_GRAPH_TRACER
  91. cmpq $ftrace_stub, ftrace_graph_return
  92. jnz ftrace_graph_caller
  93. cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
  94. jnz ftrace_graph_caller
  95. #endif
  96. .globl ftrace_stub
  97. ftrace_stub:
  98. retq
  99. trace:
  100. MCOUNT_SAVE_FRAME
  101. movq 0x38(%rsp), %rdi
  102. movq 8(%rbp), %rsi
  103. subq $MCOUNT_INSN_SIZE, %rdi
  104. call *ftrace_trace_function
  105. MCOUNT_RESTORE_FRAME
  106. jmp ftrace_stub
  107. END(mcount)
  108. #endif /* CONFIG_DYNAMIC_FTRACE */
  109. #endif /* CONFIG_FUNCTION_TRACER */
  110. #ifdef CONFIG_FUNCTION_GRAPH_TRACER
  111. ENTRY(ftrace_graph_caller)
  112. cmpl $0, function_trace_stop
  113. jne ftrace_stub
  114. MCOUNT_SAVE_FRAME
  115. leaq 8(%rbp), %rdi
  116. movq 0x38(%rsp), %rsi
  117. subq $MCOUNT_INSN_SIZE, %rsi
  118. call prepare_ftrace_return
  119. MCOUNT_RESTORE_FRAME
  120. retq
  121. END(ftrace_graph_caller)
  122. .globl return_to_handler
  123. return_to_handler:
  124. subq $80, %rsp
  125. movq %rax, (%rsp)
  126. movq %rcx, 8(%rsp)
  127. movq %rdx, 16(%rsp)
  128. movq %rsi, 24(%rsp)
  129. movq %rdi, 32(%rsp)
  130. movq %r8, 40(%rsp)
  131. movq %r9, 48(%rsp)
  132. movq %r10, 56(%rsp)
  133. movq %r11, 64(%rsp)
  134. call ftrace_return_to_handler
  135. movq %rax, 72(%rsp)
  136. movq 64(%rsp), %r11
  137. movq 56(%rsp), %r10
  138. movq 48(%rsp), %r9
  139. movq 40(%rsp), %r8
  140. movq 32(%rsp), %rdi
  141. movq 24(%rsp), %rsi
  142. movq 16(%rsp), %rdx
  143. movq 8(%rsp), %rcx
  144. movq (%rsp), %rax
  145. addq $72, %rsp
  146. retq
  147. #endif
  148. #ifndef CONFIG_PREEMPT
  149. #define retint_kernel retint_restore_args
  150. #endif
  151. #ifdef CONFIG_PARAVIRT
  152. ENTRY(native_usergs_sysret64)
  153. swapgs
  154. sysretq
  155. #endif /* CONFIG_PARAVIRT */
  156. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  157. #ifdef CONFIG_TRACE_IRQFLAGS
  158. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  159. jnc 1f
  160. TRACE_IRQS_ON
  161. 1:
  162. #endif
  163. .endm
  164. /*
  165. * C code is not supposed to know about undefined top of stack. Every time
  166. * a C function with an pt_regs argument is called from the SYSCALL based
  167. * fast path FIXUP_TOP_OF_STACK is needed.
  168. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  169. * manipulation.
  170. */
  171. /* %rsp:at FRAMEEND */
  172. .macro FIXUP_TOP_OF_STACK tmp
  173. movq %gs:pda_oldrsp,\tmp
  174. movq \tmp,RSP(%rsp)
  175. movq $__USER_DS,SS(%rsp)
  176. movq $__USER_CS,CS(%rsp)
  177. movq $-1,RCX(%rsp)
  178. movq R11(%rsp),\tmp /* get eflags */
  179. movq \tmp,EFLAGS(%rsp)
  180. .endm
  181. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  182. movq RSP-\offset(%rsp),\tmp
  183. movq \tmp,%gs:pda_oldrsp
  184. movq EFLAGS-\offset(%rsp),\tmp
  185. movq \tmp,R11-\offset(%rsp)
  186. .endm
  187. .macro FAKE_STACK_FRAME child_rip
  188. /* push in order ss, rsp, eflags, cs, rip */
  189. xorl %eax, %eax
  190. pushq $__KERNEL_DS /* ss */
  191. CFI_ADJUST_CFA_OFFSET 8
  192. /*CFI_REL_OFFSET ss,0*/
  193. pushq %rax /* rsp */
  194. CFI_ADJUST_CFA_OFFSET 8
  195. CFI_REL_OFFSET rsp,0
  196. pushq $(1<<9) /* eflags - interrupts on */
  197. CFI_ADJUST_CFA_OFFSET 8
  198. /*CFI_REL_OFFSET rflags,0*/
  199. pushq $__KERNEL_CS /* cs */
  200. CFI_ADJUST_CFA_OFFSET 8
  201. /*CFI_REL_OFFSET cs,0*/
  202. pushq \child_rip /* rip */
  203. CFI_ADJUST_CFA_OFFSET 8
  204. CFI_REL_OFFSET rip,0
  205. pushq %rax /* orig rax */
  206. CFI_ADJUST_CFA_OFFSET 8
  207. .endm
  208. .macro UNFAKE_STACK_FRAME
  209. addq $8*6, %rsp
  210. CFI_ADJUST_CFA_OFFSET -(6*8)
  211. .endm
  212. .macro CFI_DEFAULT_STACK start=1
  213. .if \start
  214. CFI_STARTPROC simple
  215. CFI_SIGNAL_FRAME
  216. CFI_DEF_CFA rsp,SS+8
  217. .else
  218. CFI_DEF_CFA_OFFSET SS+8
  219. .endif
  220. CFI_REL_OFFSET r15,R15
  221. CFI_REL_OFFSET r14,R14
  222. CFI_REL_OFFSET r13,R13
  223. CFI_REL_OFFSET r12,R12
  224. CFI_REL_OFFSET rbp,RBP
  225. CFI_REL_OFFSET rbx,RBX
  226. CFI_REL_OFFSET r11,R11
  227. CFI_REL_OFFSET r10,R10
  228. CFI_REL_OFFSET r9,R9
  229. CFI_REL_OFFSET r8,R8
  230. CFI_REL_OFFSET rax,RAX
  231. CFI_REL_OFFSET rcx,RCX
  232. CFI_REL_OFFSET rdx,RDX
  233. CFI_REL_OFFSET rsi,RSI
  234. CFI_REL_OFFSET rdi,RDI
  235. CFI_REL_OFFSET rip,RIP
  236. /*CFI_REL_OFFSET cs,CS*/
  237. /*CFI_REL_OFFSET rflags,EFLAGS*/
  238. CFI_REL_OFFSET rsp,RSP
  239. /*CFI_REL_OFFSET ss,SS*/
  240. .endm
  241. /*
  242. * A newly forked process directly context switches into this.
  243. */
  244. /* rdi: prev */
  245. ENTRY(ret_from_fork)
  246. CFI_DEFAULT_STACK
  247. push kernel_eflags(%rip)
  248. CFI_ADJUST_CFA_OFFSET 8
  249. popf # reset kernel eflags
  250. CFI_ADJUST_CFA_OFFSET -8
  251. call schedule_tail
  252. GET_THREAD_INFO(%rcx)
  253. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  254. jnz rff_trace
  255. rff_action:
  256. RESTORE_REST
  257. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  258. je int_ret_from_sys_call
  259. testl $_TIF_IA32,TI_flags(%rcx)
  260. jnz int_ret_from_sys_call
  261. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  262. jmp ret_from_sys_call
  263. rff_trace:
  264. movq %rsp,%rdi
  265. call syscall_trace_leave
  266. GET_THREAD_INFO(%rcx)
  267. jmp rff_action
  268. CFI_ENDPROC
  269. END(ret_from_fork)
  270. /*
  271. * System call entry. Upto 6 arguments in registers are supported.
  272. *
  273. * SYSCALL does not save anything on the stack and does not change the
  274. * stack pointer.
  275. */
  276. /*
  277. * Register setup:
  278. * rax system call number
  279. * rdi arg0
  280. * rcx return address for syscall/sysret, C arg3
  281. * rsi arg1
  282. * rdx arg2
  283. * r10 arg3 (--> moved to rcx for C)
  284. * r8 arg4
  285. * r9 arg5
  286. * r11 eflags for syscall/sysret, temporary for C
  287. * r12-r15,rbp,rbx saved by C code, not touched.
  288. *
  289. * Interrupts are off on entry.
  290. * Only called from user space.
  291. *
  292. * XXX if we had a free scratch register we could save the RSP into the stack frame
  293. * and report it properly in ps. Unfortunately we haven't.
  294. *
  295. * When user can change the frames always force IRET. That is because
  296. * it deals with uncanonical addresses better. SYSRET has trouble
  297. * with them due to bugs in both AMD and Intel CPUs.
  298. */
  299. ENTRY(system_call)
  300. CFI_STARTPROC simple
  301. CFI_SIGNAL_FRAME
  302. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  303. CFI_REGISTER rip,rcx
  304. /*CFI_REGISTER rflags,r11*/
  305. SWAPGS_UNSAFE_STACK
  306. /*
  307. * A hypervisor implementation might want to use a label
  308. * after the swapgs, so that it can do the swapgs
  309. * for the guest and jump here on syscall.
  310. */
  311. ENTRY(system_call_after_swapgs)
  312. movq %rsp,%gs:pda_oldrsp
  313. movq %gs:pda_kernelstack,%rsp
  314. /*
  315. * No need to follow this irqs off/on section - it's straight
  316. * and short:
  317. */
  318. ENABLE_INTERRUPTS(CLBR_NONE)
  319. SAVE_ARGS 8,1
  320. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  321. movq %rcx,RIP-ARGOFFSET(%rsp)
  322. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  323. GET_THREAD_INFO(%rcx)
  324. testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
  325. jnz tracesys
  326. system_call_fastpath:
  327. cmpq $__NR_syscall_max,%rax
  328. ja badsys
  329. movq %r10,%rcx
  330. call *sys_call_table(,%rax,8) # XXX: rip relative
  331. movq %rax,RAX-ARGOFFSET(%rsp)
  332. /*
  333. * Syscall return path ending with SYSRET (fast path)
  334. * Has incomplete stack frame and undefined top of stack.
  335. */
  336. ret_from_sys_call:
  337. movl $_TIF_ALLWORK_MASK,%edi
  338. /* edi: flagmask */
  339. sysret_check:
  340. LOCKDEP_SYS_EXIT
  341. GET_THREAD_INFO(%rcx)
  342. DISABLE_INTERRUPTS(CLBR_NONE)
  343. TRACE_IRQS_OFF
  344. movl TI_flags(%rcx),%edx
  345. andl %edi,%edx
  346. jnz sysret_careful
  347. CFI_REMEMBER_STATE
  348. /*
  349. * sysretq will re-enable interrupts:
  350. */
  351. TRACE_IRQS_ON
  352. movq RIP-ARGOFFSET(%rsp),%rcx
  353. CFI_REGISTER rip,rcx
  354. RESTORE_ARGS 0,-ARG_SKIP,1
  355. /*CFI_REGISTER rflags,r11*/
  356. movq %gs:pda_oldrsp, %rsp
  357. USERGS_SYSRET64
  358. CFI_RESTORE_STATE
  359. /* Handle reschedules */
  360. /* edx: work, edi: workmask */
  361. sysret_careful:
  362. bt $TIF_NEED_RESCHED,%edx
  363. jnc sysret_signal
  364. TRACE_IRQS_ON
  365. ENABLE_INTERRUPTS(CLBR_NONE)
  366. pushq %rdi
  367. CFI_ADJUST_CFA_OFFSET 8
  368. call schedule
  369. popq %rdi
  370. CFI_ADJUST_CFA_OFFSET -8
  371. jmp sysret_check
  372. /* Handle a signal */
  373. sysret_signal:
  374. TRACE_IRQS_ON
  375. ENABLE_INTERRUPTS(CLBR_NONE)
  376. #ifdef CONFIG_AUDITSYSCALL
  377. bt $TIF_SYSCALL_AUDIT,%edx
  378. jc sysret_audit
  379. #endif
  380. /* edx: work flags (arg3) */
  381. leaq do_notify_resume(%rip),%rax
  382. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  383. xorl %esi,%esi # oldset -> arg2
  384. call ptregscall_common
  385. movl $_TIF_WORK_MASK,%edi
  386. /* Use IRET because user could have changed frame. This
  387. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  388. DISABLE_INTERRUPTS(CLBR_NONE)
  389. TRACE_IRQS_OFF
  390. jmp int_with_check
  391. badsys:
  392. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  393. jmp ret_from_sys_call
  394. #ifdef CONFIG_AUDITSYSCALL
  395. /*
  396. * Fast path for syscall audit without full syscall trace.
  397. * We just call audit_syscall_entry() directly, and then
  398. * jump back to the normal fast path.
  399. */
  400. auditsys:
  401. movq %r10,%r9 /* 6th arg: 4th syscall arg */
  402. movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
  403. movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
  404. movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
  405. movq %rax,%rsi /* 2nd arg: syscall number */
  406. movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
  407. call audit_syscall_entry
  408. LOAD_ARGS 0 /* reload call-clobbered registers */
  409. jmp system_call_fastpath
  410. /*
  411. * Return fast path for syscall audit. Call audit_syscall_exit()
  412. * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
  413. * masked off.
  414. */
  415. sysret_audit:
  416. movq %rax,%rsi /* second arg, syscall return value */
  417. cmpq $0,%rax /* is it < 0? */
  418. setl %al /* 1 if so, 0 if not */
  419. movzbl %al,%edi /* zero-extend that into %edi */
  420. inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
  421. call audit_syscall_exit
  422. movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
  423. jmp sysret_check
  424. #endif /* CONFIG_AUDITSYSCALL */
  425. /* Do syscall tracing */
  426. tracesys:
  427. #ifdef CONFIG_AUDITSYSCALL
  428. testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  429. jz auditsys
  430. #endif
  431. SAVE_REST
  432. movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
  433. FIXUP_TOP_OF_STACK %rdi
  434. movq %rsp,%rdi
  435. call syscall_trace_enter
  436. /*
  437. * Reload arg registers from stack in case ptrace changed them.
  438. * We don't reload %rax because syscall_trace_enter() returned
  439. * the value it wants us to use in the table lookup.
  440. */
  441. LOAD_ARGS ARGOFFSET, 1
  442. RESTORE_REST
  443. cmpq $__NR_syscall_max,%rax
  444. ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
  445. movq %r10,%rcx /* fixup for C */
  446. call *sys_call_table(,%rax,8)
  447. movq %rax,RAX-ARGOFFSET(%rsp)
  448. /* Use IRET because user could have changed frame */
  449. /*
  450. * Syscall return path ending with IRET.
  451. * Has correct top of stack, but partial stack frame.
  452. */
  453. .globl int_ret_from_sys_call
  454. .globl int_with_check
  455. int_ret_from_sys_call:
  456. DISABLE_INTERRUPTS(CLBR_NONE)
  457. TRACE_IRQS_OFF
  458. testl $3,CS-ARGOFFSET(%rsp)
  459. je retint_restore_args
  460. movl $_TIF_ALLWORK_MASK,%edi
  461. /* edi: mask to check */
  462. int_with_check:
  463. LOCKDEP_SYS_EXIT_IRQ
  464. GET_THREAD_INFO(%rcx)
  465. movl TI_flags(%rcx),%edx
  466. andl %edi,%edx
  467. jnz int_careful
  468. andl $~TS_COMPAT,TI_status(%rcx)
  469. jmp retint_swapgs
  470. /* Either reschedule or signal or syscall exit tracking needed. */
  471. /* First do a reschedule test. */
  472. /* edx: work, edi: workmask */
  473. int_careful:
  474. bt $TIF_NEED_RESCHED,%edx
  475. jnc int_very_careful
  476. TRACE_IRQS_ON
  477. ENABLE_INTERRUPTS(CLBR_NONE)
  478. pushq %rdi
  479. CFI_ADJUST_CFA_OFFSET 8
  480. call schedule
  481. popq %rdi
  482. CFI_ADJUST_CFA_OFFSET -8
  483. DISABLE_INTERRUPTS(CLBR_NONE)
  484. TRACE_IRQS_OFF
  485. jmp int_with_check
  486. /* handle signals and tracing -- both require a full stack frame */
  487. int_very_careful:
  488. TRACE_IRQS_ON
  489. ENABLE_INTERRUPTS(CLBR_NONE)
  490. SAVE_REST
  491. /* Check for syscall exit trace */
  492. testl $_TIF_WORK_SYSCALL_EXIT,%edx
  493. jz int_signal
  494. pushq %rdi
  495. CFI_ADJUST_CFA_OFFSET 8
  496. leaq 8(%rsp),%rdi # &ptregs -> arg1
  497. call syscall_trace_leave
  498. popq %rdi
  499. CFI_ADJUST_CFA_OFFSET -8
  500. andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
  501. jmp int_restore_rest
  502. int_signal:
  503. testl $_TIF_DO_NOTIFY_MASK,%edx
  504. jz 1f
  505. movq %rsp,%rdi # &ptregs -> arg1
  506. xorl %esi,%esi # oldset -> arg2
  507. call do_notify_resume
  508. 1: movl $_TIF_WORK_MASK,%edi
  509. int_restore_rest:
  510. RESTORE_REST
  511. DISABLE_INTERRUPTS(CLBR_NONE)
  512. TRACE_IRQS_OFF
  513. jmp int_with_check
  514. CFI_ENDPROC
  515. END(system_call)
  516. /*
  517. * Certain special system calls that need to save a complete full stack frame.
  518. */
  519. .macro PTREGSCALL label,func,arg
  520. .globl \label
  521. \label:
  522. leaq \func(%rip),%rax
  523. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  524. jmp ptregscall_common
  525. END(\label)
  526. .endm
  527. CFI_STARTPROC
  528. PTREGSCALL stub_clone, sys_clone, %r8
  529. PTREGSCALL stub_fork, sys_fork, %rdi
  530. PTREGSCALL stub_vfork, sys_vfork, %rdi
  531. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  532. PTREGSCALL stub_iopl, sys_iopl, %rsi
  533. ENTRY(ptregscall_common)
  534. popq %r11
  535. CFI_ADJUST_CFA_OFFSET -8
  536. CFI_REGISTER rip, r11
  537. SAVE_REST
  538. movq %r11, %r15
  539. CFI_REGISTER rip, r15
  540. FIXUP_TOP_OF_STACK %r11
  541. call *%rax
  542. RESTORE_TOP_OF_STACK %r11
  543. movq %r15, %r11
  544. CFI_REGISTER rip, r11
  545. RESTORE_REST
  546. pushq %r11
  547. CFI_ADJUST_CFA_OFFSET 8
  548. CFI_REL_OFFSET rip, 0
  549. ret
  550. CFI_ENDPROC
  551. END(ptregscall_common)
  552. ENTRY(stub_execve)
  553. CFI_STARTPROC
  554. popq %r11
  555. CFI_ADJUST_CFA_OFFSET -8
  556. CFI_REGISTER rip, r11
  557. SAVE_REST
  558. FIXUP_TOP_OF_STACK %r11
  559. movq %rsp, %rcx
  560. call sys_execve
  561. RESTORE_TOP_OF_STACK %r11
  562. movq %rax,RAX(%rsp)
  563. RESTORE_REST
  564. jmp int_ret_from_sys_call
  565. CFI_ENDPROC
  566. END(stub_execve)
  567. /*
  568. * sigreturn is special because it needs to restore all registers on return.
  569. * This cannot be done with SYSRET, so use the IRET return path instead.
  570. */
  571. ENTRY(stub_rt_sigreturn)
  572. CFI_STARTPROC
  573. addq $8, %rsp
  574. CFI_ADJUST_CFA_OFFSET -8
  575. SAVE_REST
  576. movq %rsp,%rdi
  577. FIXUP_TOP_OF_STACK %r11
  578. call sys_rt_sigreturn
  579. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  580. RESTORE_REST
  581. jmp int_ret_from_sys_call
  582. CFI_ENDPROC
  583. END(stub_rt_sigreturn)
  584. /*
  585. * initial frame state for interrupts and exceptions
  586. */
  587. .macro _frame ref
  588. CFI_STARTPROC simple
  589. CFI_SIGNAL_FRAME
  590. CFI_DEF_CFA rsp,SS+8-\ref
  591. /*CFI_REL_OFFSET ss,SS-\ref*/
  592. CFI_REL_OFFSET rsp,RSP-\ref
  593. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  594. /*CFI_REL_OFFSET cs,CS-\ref*/
  595. CFI_REL_OFFSET rip,RIP-\ref
  596. .endm
  597. /* initial frame state for interrupts (and exceptions without error code) */
  598. #define INTR_FRAME _frame RIP
  599. /* initial frame state for exceptions with error code (and interrupts with
  600. vector already pushed) */
  601. #define XCPT_FRAME _frame ORIG_RAX
  602. /*
  603. * Interrupt entry/exit.
  604. *
  605. * Interrupt entry points save only callee clobbered registers in fast path.
  606. *
  607. * Entry runs with interrupts off.
  608. */
  609. /* 0(%rsp): interrupt number */
  610. .macro interrupt func
  611. cld
  612. SAVE_ARGS
  613. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  614. pushq %rbp
  615. /*
  616. * Save rbp twice: One is for marking the stack frame, as usual, and the
  617. * other, to fill pt_regs properly. This is because bx comes right
  618. * before the last saved register in that structure, and not bp. If the
  619. * base pointer were in the place bx is today, this would not be needed.
  620. */
  621. movq %rbp, -8(%rsp)
  622. CFI_ADJUST_CFA_OFFSET 8
  623. CFI_REL_OFFSET rbp, 0
  624. movq %rsp,%rbp
  625. CFI_DEF_CFA_REGISTER rbp
  626. testl $3,CS(%rdi)
  627. je 1f
  628. SWAPGS
  629. /* irqcount is used to check if a CPU is already on an interrupt
  630. stack or not. While this is essentially redundant with preempt_count
  631. it is a little cheaper to use a separate counter in the PDA
  632. (short of moving irq_enter into assembly, which would be too
  633. much work) */
  634. 1: incl %gs:pda_irqcount
  635. cmoveq %gs:pda_irqstackptr,%rsp
  636. push %rbp # backlink for old unwinder
  637. /*
  638. * We entered an interrupt context - irqs are off:
  639. */
  640. TRACE_IRQS_OFF
  641. call \func
  642. .endm
  643. ENTRY(common_interrupt)
  644. XCPT_FRAME
  645. interrupt do_IRQ
  646. /* 0(%rsp): oldrsp-ARGOFFSET */
  647. ret_from_intr:
  648. DISABLE_INTERRUPTS(CLBR_NONE)
  649. TRACE_IRQS_OFF
  650. decl %gs:pda_irqcount
  651. leaveq
  652. CFI_DEF_CFA_REGISTER rsp
  653. CFI_ADJUST_CFA_OFFSET -8
  654. exit_intr:
  655. GET_THREAD_INFO(%rcx)
  656. testl $3,CS-ARGOFFSET(%rsp)
  657. je retint_kernel
  658. /* Interrupt came from user space */
  659. /*
  660. * Has a correct top of stack, but a partial stack frame
  661. * %rcx: thread info. Interrupts off.
  662. */
  663. retint_with_reschedule:
  664. movl $_TIF_WORK_MASK,%edi
  665. retint_check:
  666. LOCKDEP_SYS_EXIT_IRQ
  667. movl TI_flags(%rcx),%edx
  668. andl %edi,%edx
  669. CFI_REMEMBER_STATE
  670. jnz retint_careful
  671. retint_swapgs: /* return to user-space */
  672. /*
  673. * The iretq could re-enable interrupts:
  674. */
  675. DISABLE_INTERRUPTS(CLBR_ANY)
  676. TRACE_IRQS_IRETQ
  677. SWAPGS
  678. jmp restore_args
  679. retint_restore_args: /* return to kernel space */
  680. DISABLE_INTERRUPTS(CLBR_ANY)
  681. /*
  682. * The iretq could re-enable interrupts:
  683. */
  684. TRACE_IRQS_IRETQ
  685. restore_args:
  686. RESTORE_ARGS 0,8,0
  687. irq_return:
  688. INTERRUPT_RETURN
  689. .section __ex_table, "a"
  690. .quad irq_return, bad_iret
  691. .previous
  692. #ifdef CONFIG_PARAVIRT
  693. ENTRY(native_iret)
  694. iretq
  695. .section __ex_table,"a"
  696. .quad native_iret, bad_iret
  697. .previous
  698. #endif
  699. .section .fixup,"ax"
  700. bad_iret:
  701. /*
  702. * The iret traps when the %cs or %ss being restored is bogus.
  703. * We've lost the original trap vector and error code.
  704. * #GPF is the most likely one to get for an invalid selector.
  705. * So pretend we completed the iret and took the #GPF in user mode.
  706. *
  707. * We are now running with the kernel GS after exception recovery.
  708. * But error_entry expects us to have user GS to match the user %cs,
  709. * so swap back.
  710. */
  711. pushq $0
  712. SWAPGS
  713. jmp general_protection
  714. .previous
  715. /* edi: workmask, edx: work */
  716. retint_careful:
  717. CFI_RESTORE_STATE
  718. bt $TIF_NEED_RESCHED,%edx
  719. jnc retint_signal
  720. TRACE_IRQS_ON
  721. ENABLE_INTERRUPTS(CLBR_NONE)
  722. pushq %rdi
  723. CFI_ADJUST_CFA_OFFSET 8
  724. call schedule
  725. popq %rdi
  726. CFI_ADJUST_CFA_OFFSET -8
  727. GET_THREAD_INFO(%rcx)
  728. DISABLE_INTERRUPTS(CLBR_NONE)
  729. TRACE_IRQS_OFF
  730. jmp retint_check
  731. retint_signal:
  732. testl $_TIF_DO_NOTIFY_MASK,%edx
  733. jz retint_swapgs
  734. TRACE_IRQS_ON
  735. ENABLE_INTERRUPTS(CLBR_NONE)
  736. SAVE_REST
  737. movq $-1,ORIG_RAX(%rsp)
  738. xorl %esi,%esi # oldset
  739. movq %rsp,%rdi # &pt_regs
  740. call do_notify_resume
  741. RESTORE_REST
  742. DISABLE_INTERRUPTS(CLBR_NONE)
  743. TRACE_IRQS_OFF
  744. GET_THREAD_INFO(%rcx)
  745. jmp retint_with_reschedule
  746. #ifdef CONFIG_PREEMPT
  747. /* Returning to kernel space. Check if we need preemption */
  748. /* rcx: threadinfo. interrupts off. */
  749. ENTRY(retint_kernel)
  750. cmpl $0,TI_preempt_count(%rcx)
  751. jnz retint_restore_args
  752. bt $TIF_NEED_RESCHED,TI_flags(%rcx)
  753. jnc retint_restore_args
  754. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  755. jnc retint_restore_args
  756. call preempt_schedule_irq
  757. jmp exit_intr
  758. #endif
  759. CFI_ENDPROC
  760. END(common_interrupt)
  761. /*
  762. * APIC interrupts.
  763. */
  764. .macro apicinterrupt num,func
  765. INTR_FRAME
  766. pushq $~(\num)
  767. CFI_ADJUST_CFA_OFFSET 8
  768. interrupt \func
  769. jmp ret_from_intr
  770. CFI_ENDPROC
  771. .endm
  772. ENTRY(thermal_interrupt)
  773. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  774. END(thermal_interrupt)
  775. ENTRY(threshold_interrupt)
  776. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  777. END(threshold_interrupt)
  778. #ifdef CONFIG_SMP
  779. ENTRY(reschedule_interrupt)
  780. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  781. END(reschedule_interrupt)
  782. .macro INVALIDATE_ENTRY num
  783. ENTRY(invalidate_interrupt\num)
  784. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  785. END(invalidate_interrupt\num)
  786. .endm
  787. INVALIDATE_ENTRY 0
  788. INVALIDATE_ENTRY 1
  789. INVALIDATE_ENTRY 2
  790. INVALIDATE_ENTRY 3
  791. INVALIDATE_ENTRY 4
  792. INVALIDATE_ENTRY 5
  793. INVALIDATE_ENTRY 6
  794. INVALIDATE_ENTRY 7
  795. ENTRY(call_function_interrupt)
  796. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  797. END(call_function_interrupt)
  798. ENTRY(call_function_single_interrupt)
  799. apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
  800. END(call_function_single_interrupt)
  801. ENTRY(irq_move_cleanup_interrupt)
  802. apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
  803. END(irq_move_cleanup_interrupt)
  804. #endif
  805. ENTRY(apic_timer_interrupt)
  806. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  807. END(apic_timer_interrupt)
  808. ENTRY(uv_bau_message_intr1)
  809. apicinterrupt 220,uv_bau_message_interrupt
  810. END(uv_bau_message_intr1)
  811. ENTRY(error_interrupt)
  812. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  813. END(error_interrupt)
  814. ENTRY(spurious_interrupt)
  815. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  816. END(spurious_interrupt)
  817. /*
  818. * Exception entry points.
  819. */
  820. .macro zeroentry sym
  821. INTR_FRAME
  822. PARAVIRT_ADJUST_EXCEPTION_FRAME
  823. pushq $0 /* push error code/oldrax */
  824. CFI_ADJUST_CFA_OFFSET 8
  825. pushq %rax /* push real oldrax to the rdi slot */
  826. CFI_ADJUST_CFA_OFFSET 8
  827. CFI_REL_OFFSET rax,0
  828. leaq \sym(%rip),%rax
  829. jmp error_entry
  830. CFI_ENDPROC
  831. .endm
  832. .macro errorentry sym
  833. XCPT_FRAME
  834. PARAVIRT_ADJUST_EXCEPTION_FRAME
  835. pushq %rax
  836. CFI_ADJUST_CFA_OFFSET 8
  837. CFI_REL_OFFSET rax,0
  838. leaq \sym(%rip),%rax
  839. jmp error_entry
  840. CFI_ENDPROC
  841. .endm
  842. /* error code is on the stack already */
  843. /* handle NMI like exceptions that can happen everywhere */
  844. .macro paranoidentry sym, ist=0, irqtrace=1
  845. SAVE_ALL
  846. cld
  847. movl $1,%ebx
  848. movl $MSR_GS_BASE,%ecx
  849. rdmsr
  850. testl %edx,%edx
  851. js 1f
  852. SWAPGS
  853. xorl %ebx,%ebx
  854. 1:
  855. .if \ist
  856. movq %gs:pda_data_offset, %rbp
  857. .endif
  858. .if \irqtrace
  859. TRACE_IRQS_OFF
  860. .endif
  861. movq %rsp,%rdi
  862. movq ORIG_RAX(%rsp),%rsi
  863. movq $-1,ORIG_RAX(%rsp)
  864. .if \ist
  865. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  866. .endif
  867. call \sym
  868. .if \ist
  869. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  870. .endif
  871. DISABLE_INTERRUPTS(CLBR_NONE)
  872. .if \irqtrace
  873. TRACE_IRQS_OFF
  874. .endif
  875. .endm
  876. /*
  877. * "Paranoid" exit path from exception stack.
  878. * Paranoid because this is used by NMIs and cannot take
  879. * any kernel state for granted.
  880. * We don't do kernel preemption checks here, because only
  881. * NMI should be common and it does not enable IRQs and
  882. * cannot get reschedule ticks.
  883. *
  884. * "trace" is 0 for the NMI handler only, because irq-tracing
  885. * is fundamentally NMI-unsafe. (we cannot change the soft and
  886. * hard flags at once, atomically)
  887. */
  888. .macro paranoidexit trace=1
  889. /* ebx: no swapgs flag */
  890. paranoid_exit\trace:
  891. testl %ebx,%ebx /* swapgs needed? */
  892. jnz paranoid_restore\trace
  893. testl $3,CS(%rsp)
  894. jnz paranoid_userspace\trace
  895. paranoid_swapgs\trace:
  896. .if \trace
  897. TRACE_IRQS_IRETQ 0
  898. .endif
  899. SWAPGS_UNSAFE_STACK
  900. paranoid_restore\trace:
  901. RESTORE_ALL 8
  902. jmp irq_return
  903. paranoid_userspace\trace:
  904. GET_THREAD_INFO(%rcx)
  905. movl TI_flags(%rcx),%ebx
  906. andl $_TIF_WORK_MASK,%ebx
  907. jz paranoid_swapgs\trace
  908. movq %rsp,%rdi /* &pt_regs */
  909. call sync_regs
  910. movq %rax,%rsp /* switch stack for scheduling */
  911. testl $_TIF_NEED_RESCHED,%ebx
  912. jnz paranoid_schedule\trace
  913. movl %ebx,%edx /* arg3: thread flags */
  914. .if \trace
  915. TRACE_IRQS_ON
  916. .endif
  917. ENABLE_INTERRUPTS(CLBR_NONE)
  918. xorl %esi,%esi /* arg2: oldset */
  919. movq %rsp,%rdi /* arg1: &pt_regs */
  920. call do_notify_resume
  921. DISABLE_INTERRUPTS(CLBR_NONE)
  922. .if \trace
  923. TRACE_IRQS_OFF
  924. .endif
  925. jmp paranoid_userspace\trace
  926. paranoid_schedule\trace:
  927. .if \trace
  928. TRACE_IRQS_ON
  929. .endif
  930. ENABLE_INTERRUPTS(CLBR_ANY)
  931. call schedule
  932. DISABLE_INTERRUPTS(CLBR_ANY)
  933. .if \trace
  934. TRACE_IRQS_OFF
  935. .endif
  936. jmp paranoid_userspace\trace
  937. CFI_ENDPROC
  938. .endm
  939. /*
  940. * Exception entry point. This expects an error code/orig_rax on the stack
  941. * and the exception handler in %rax.
  942. */
  943. KPROBE_ENTRY(error_entry)
  944. _frame RDI
  945. CFI_REL_OFFSET rax,0
  946. /* rdi slot contains rax, oldrax contains error code */
  947. cld
  948. subq $14*8,%rsp
  949. CFI_ADJUST_CFA_OFFSET (14*8)
  950. movq %rsi,13*8(%rsp)
  951. CFI_REL_OFFSET rsi,RSI
  952. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  953. CFI_REGISTER rax,rsi
  954. movq %rdx,12*8(%rsp)
  955. CFI_REL_OFFSET rdx,RDX
  956. movq %rcx,11*8(%rsp)
  957. CFI_REL_OFFSET rcx,RCX
  958. movq %rsi,10*8(%rsp) /* store rax */
  959. CFI_REL_OFFSET rax,RAX
  960. movq %r8, 9*8(%rsp)
  961. CFI_REL_OFFSET r8,R8
  962. movq %r9, 8*8(%rsp)
  963. CFI_REL_OFFSET r9,R9
  964. movq %r10,7*8(%rsp)
  965. CFI_REL_OFFSET r10,R10
  966. movq %r11,6*8(%rsp)
  967. CFI_REL_OFFSET r11,R11
  968. movq %rbx,5*8(%rsp)
  969. CFI_REL_OFFSET rbx,RBX
  970. movq %rbp,4*8(%rsp)
  971. CFI_REL_OFFSET rbp,RBP
  972. movq %r12,3*8(%rsp)
  973. CFI_REL_OFFSET r12,R12
  974. movq %r13,2*8(%rsp)
  975. CFI_REL_OFFSET r13,R13
  976. movq %r14,1*8(%rsp)
  977. CFI_REL_OFFSET r14,R14
  978. movq %r15,(%rsp)
  979. CFI_REL_OFFSET r15,R15
  980. xorl %ebx,%ebx
  981. testl $3,CS(%rsp)
  982. je error_kernelspace
  983. error_swapgs:
  984. SWAPGS
  985. error_sti:
  986. TRACE_IRQS_OFF
  987. movq %rdi,RDI(%rsp)
  988. CFI_REL_OFFSET rdi,RDI
  989. movq %rsp,%rdi
  990. movq ORIG_RAX(%rsp),%rsi /* get error code */
  991. movq $-1,ORIG_RAX(%rsp)
  992. call *%rax
  993. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  994. error_exit:
  995. movl %ebx,%eax
  996. RESTORE_REST
  997. DISABLE_INTERRUPTS(CLBR_NONE)
  998. TRACE_IRQS_OFF
  999. GET_THREAD_INFO(%rcx)
  1000. testl %eax,%eax
  1001. jne retint_kernel
  1002. LOCKDEP_SYS_EXIT_IRQ
  1003. movl TI_flags(%rcx),%edx
  1004. movl $_TIF_WORK_MASK,%edi
  1005. andl %edi,%edx
  1006. jnz retint_careful
  1007. jmp retint_swapgs
  1008. CFI_ENDPROC
  1009. error_kernelspace:
  1010. incl %ebx
  1011. /* There are two places in the kernel that can potentially fault with
  1012. usergs. Handle them here. The exception handlers after
  1013. iret run with kernel gs again, so don't set the user space flag.
  1014. B stepping K8s sometimes report an truncated RIP for IRET
  1015. exceptions returning to compat mode. Check for these here too. */
  1016. leaq irq_return(%rip),%rcx
  1017. cmpq %rcx,RIP(%rsp)
  1018. je error_swapgs
  1019. movl %ecx,%ecx /* zero extend */
  1020. cmpq %rcx,RIP(%rsp)
  1021. je error_swapgs
  1022. cmpq $gs_change,RIP(%rsp)
  1023. je error_swapgs
  1024. jmp error_sti
  1025. KPROBE_END(error_entry)
  1026. /* Reload gs selector with exception handling */
  1027. /* edi: new selector */
  1028. ENTRY(native_load_gs_index)
  1029. CFI_STARTPROC
  1030. pushf
  1031. CFI_ADJUST_CFA_OFFSET 8
  1032. DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
  1033. SWAPGS
  1034. gs_change:
  1035. movl %edi,%gs
  1036. 2: mfence /* workaround */
  1037. SWAPGS
  1038. popf
  1039. CFI_ADJUST_CFA_OFFSET -8
  1040. ret
  1041. CFI_ENDPROC
  1042. ENDPROC(native_load_gs_index)
  1043. .section __ex_table,"a"
  1044. .align 8
  1045. .quad gs_change,bad_gs
  1046. .previous
  1047. .section .fixup,"ax"
  1048. /* running with kernelgs */
  1049. bad_gs:
  1050. SWAPGS /* switch back to user gs */
  1051. xorl %eax,%eax
  1052. movl %eax,%gs
  1053. jmp 2b
  1054. .previous
  1055. /*
  1056. * Create a kernel thread.
  1057. *
  1058. * C extern interface:
  1059. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  1060. *
  1061. * asm input arguments:
  1062. * rdi: fn, rsi: arg, rdx: flags
  1063. */
  1064. ENTRY(kernel_thread)
  1065. CFI_STARTPROC
  1066. FAKE_STACK_FRAME $child_rip
  1067. SAVE_ALL
  1068. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  1069. movq %rdx,%rdi
  1070. orq kernel_thread_flags(%rip),%rdi
  1071. movq $-1, %rsi
  1072. movq %rsp, %rdx
  1073. xorl %r8d,%r8d
  1074. xorl %r9d,%r9d
  1075. # clone now
  1076. call do_fork
  1077. movq %rax,RAX(%rsp)
  1078. xorl %edi,%edi
  1079. /*
  1080. * It isn't worth to check for reschedule here,
  1081. * so internally to the x86_64 port you can rely on kernel_thread()
  1082. * not to reschedule the child before returning, this avoids the need
  1083. * of hacks for example to fork off the per-CPU idle tasks.
  1084. * [Hopefully no generic code relies on the reschedule -AK]
  1085. */
  1086. RESTORE_ALL
  1087. UNFAKE_STACK_FRAME
  1088. ret
  1089. CFI_ENDPROC
  1090. ENDPROC(kernel_thread)
  1091. child_rip:
  1092. pushq $0 # fake return address
  1093. CFI_STARTPROC
  1094. /*
  1095. * Here we are in the child and the registers are set as they were
  1096. * at kernel_thread() invocation in the parent.
  1097. */
  1098. movq %rdi, %rax
  1099. movq %rsi, %rdi
  1100. call *%rax
  1101. # exit
  1102. mov %eax, %edi
  1103. call do_exit
  1104. CFI_ENDPROC
  1105. ENDPROC(child_rip)
  1106. /*
  1107. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  1108. *
  1109. * C extern interface:
  1110. * extern long execve(char *name, char **argv, char **envp)
  1111. *
  1112. * asm input arguments:
  1113. * rdi: name, rsi: argv, rdx: envp
  1114. *
  1115. * We want to fallback into:
  1116. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
  1117. *
  1118. * do_sys_execve asm fallback arguments:
  1119. * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
  1120. */
  1121. ENTRY(kernel_execve)
  1122. CFI_STARTPROC
  1123. FAKE_STACK_FRAME $0
  1124. SAVE_ALL
  1125. movq %rsp,%rcx
  1126. call sys_execve
  1127. movq %rax, RAX(%rsp)
  1128. RESTORE_REST
  1129. testq %rax,%rax
  1130. je int_ret_from_sys_call
  1131. RESTORE_ARGS
  1132. UNFAKE_STACK_FRAME
  1133. ret
  1134. CFI_ENDPROC
  1135. ENDPROC(kernel_execve)
  1136. KPROBE_ENTRY(page_fault)
  1137. errorentry do_page_fault
  1138. KPROBE_END(page_fault)
  1139. ENTRY(coprocessor_error)
  1140. zeroentry do_coprocessor_error
  1141. END(coprocessor_error)
  1142. ENTRY(simd_coprocessor_error)
  1143. zeroentry do_simd_coprocessor_error
  1144. END(simd_coprocessor_error)
  1145. ENTRY(device_not_available)
  1146. zeroentry do_device_not_available
  1147. END(device_not_available)
  1148. /* runs on exception stack */
  1149. KPROBE_ENTRY(debug)
  1150. INTR_FRAME
  1151. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1152. pushq $0
  1153. CFI_ADJUST_CFA_OFFSET 8
  1154. paranoidentry do_debug, DEBUG_STACK
  1155. paranoidexit
  1156. KPROBE_END(debug)
  1157. /* runs on exception stack */
  1158. KPROBE_ENTRY(nmi)
  1159. INTR_FRAME
  1160. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1161. pushq $-1
  1162. CFI_ADJUST_CFA_OFFSET 8
  1163. paranoidentry do_nmi, 0, 0
  1164. #ifdef CONFIG_TRACE_IRQFLAGS
  1165. paranoidexit 0
  1166. #else
  1167. jmp paranoid_exit1
  1168. CFI_ENDPROC
  1169. #endif
  1170. KPROBE_END(nmi)
  1171. KPROBE_ENTRY(int3)
  1172. INTR_FRAME
  1173. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1174. pushq $0
  1175. CFI_ADJUST_CFA_OFFSET 8
  1176. paranoidentry do_int3, DEBUG_STACK
  1177. jmp paranoid_exit1
  1178. CFI_ENDPROC
  1179. KPROBE_END(int3)
  1180. ENTRY(overflow)
  1181. zeroentry do_overflow
  1182. END(overflow)
  1183. ENTRY(bounds)
  1184. zeroentry do_bounds
  1185. END(bounds)
  1186. ENTRY(invalid_op)
  1187. zeroentry do_invalid_op
  1188. END(invalid_op)
  1189. ENTRY(coprocessor_segment_overrun)
  1190. zeroentry do_coprocessor_segment_overrun
  1191. END(coprocessor_segment_overrun)
  1192. /* runs on exception stack */
  1193. ENTRY(double_fault)
  1194. XCPT_FRAME
  1195. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1196. paranoidentry do_double_fault
  1197. jmp paranoid_exit1
  1198. CFI_ENDPROC
  1199. END(double_fault)
  1200. ENTRY(invalid_TSS)
  1201. errorentry do_invalid_TSS
  1202. END(invalid_TSS)
  1203. ENTRY(segment_not_present)
  1204. errorentry do_segment_not_present
  1205. END(segment_not_present)
  1206. /* runs on exception stack */
  1207. ENTRY(stack_segment)
  1208. XCPT_FRAME
  1209. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1210. paranoidentry do_stack_segment
  1211. jmp paranoid_exit1
  1212. CFI_ENDPROC
  1213. END(stack_segment)
  1214. KPROBE_ENTRY(general_protection)
  1215. errorentry do_general_protection
  1216. KPROBE_END(general_protection)
  1217. ENTRY(alignment_check)
  1218. errorentry do_alignment_check
  1219. END(alignment_check)
  1220. ENTRY(divide_error)
  1221. zeroentry do_divide_error
  1222. END(divide_error)
  1223. ENTRY(spurious_interrupt_bug)
  1224. zeroentry do_spurious_interrupt_bug
  1225. END(spurious_interrupt_bug)
  1226. #ifdef CONFIG_X86_MCE
  1227. /* runs on exception stack */
  1228. ENTRY(machine_check)
  1229. INTR_FRAME
  1230. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1231. pushq $0
  1232. CFI_ADJUST_CFA_OFFSET 8
  1233. paranoidentry do_machine_check
  1234. jmp paranoid_exit1
  1235. CFI_ENDPROC
  1236. END(machine_check)
  1237. #endif
  1238. /* Call softirq on interrupt stack. Interrupts are off. */
  1239. ENTRY(call_softirq)
  1240. CFI_STARTPROC
  1241. push %rbp
  1242. CFI_ADJUST_CFA_OFFSET 8
  1243. CFI_REL_OFFSET rbp,0
  1244. mov %rsp,%rbp
  1245. CFI_DEF_CFA_REGISTER rbp
  1246. incl %gs:pda_irqcount
  1247. cmove %gs:pda_irqstackptr,%rsp
  1248. push %rbp # backlink for old unwinder
  1249. call __do_softirq
  1250. leaveq
  1251. CFI_DEF_CFA_REGISTER rsp
  1252. CFI_ADJUST_CFA_OFFSET -8
  1253. decl %gs:pda_irqcount
  1254. ret
  1255. CFI_ENDPROC
  1256. ENDPROC(call_softirq)
  1257. KPROBE_ENTRY(ignore_sysret)
  1258. CFI_STARTPROC
  1259. mov $-ENOSYS,%eax
  1260. sysret
  1261. CFI_ENDPROC
  1262. ENDPROC(ignore_sysret)
  1263. #ifdef CONFIG_XEN
  1264. ENTRY(xen_hypervisor_callback)
  1265. zeroentry xen_do_hypervisor_callback
  1266. END(xen_hypervisor_callback)
  1267. /*
  1268. # A note on the "critical region" in our callback handler.
  1269. # We want to avoid stacking callback handlers due to events occurring
  1270. # during handling of the last event. To do this, we keep events disabled
  1271. # until we've done all processing. HOWEVER, we must enable events before
  1272. # popping the stack frame (can't be done atomically) and so it would still
  1273. # be possible to get enough handler activations to overflow the stack.
  1274. # Although unlikely, bugs of that kind are hard to track down, so we'd
  1275. # like to avoid the possibility.
  1276. # So, on entry to the handler we detect whether we interrupted an
  1277. # existing activation in its critical region -- if so, we pop the current
  1278. # activation and restart the handler using the previous one.
  1279. */
  1280. ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
  1281. CFI_STARTPROC
  1282. /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
  1283. see the correct pointer to the pt_regs */
  1284. movq %rdi, %rsp # we don't return, adjust the stack frame
  1285. CFI_ENDPROC
  1286. CFI_DEFAULT_STACK
  1287. 11: incl %gs:pda_irqcount
  1288. movq %rsp,%rbp
  1289. CFI_DEF_CFA_REGISTER rbp
  1290. cmovzq %gs:pda_irqstackptr,%rsp
  1291. pushq %rbp # backlink for old unwinder
  1292. call xen_evtchn_do_upcall
  1293. popq %rsp
  1294. CFI_DEF_CFA_REGISTER rsp
  1295. decl %gs:pda_irqcount
  1296. jmp error_exit
  1297. CFI_ENDPROC
  1298. END(do_hypervisor_callback)
  1299. /*
  1300. # Hypervisor uses this for application faults while it executes.
  1301. # We get here for two reasons:
  1302. # 1. Fault while reloading DS, ES, FS or GS
  1303. # 2. Fault while executing IRET
  1304. # Category 1 we do not need to fix up as Xen has already reloaded all segment
  1305. # registers that could be reloaded and zeroed the others.
  1306. # Category 2 we fix up by killing the current process. We cannot use the
  1307. # normal Linux return path in this case because if we use the IRET hypercall
  1308. # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
  1309. # We distinguish between categories by comparing each saved segment register
  1310. # with its current contents: any discrepancy means we in category 1.
  1311. */
  1312. ENTRY(xen_failsafe_callback)
  1313. framesz = (RIP-0x30) /* workaround buggy gas */
  1314. _frame framesz
  1315. CFI_REL_OFFSET rcx, 0
  1316. CFI_REL_OFFSET r11, 8
  1317. movw %ds,%cx
  1318. cmpw %cx,0x10(%rsp)
  1319. CFI_REMEMBER_STATE
  1320. jne 1f
  1321. movw %es,%cx
  1322. cmpw %cx,0x18(%rsp)
  1323. jne 1f
  1324. movw %fs,%cx
  1325. cmpw %cx,0x20(%rsp)
  1326. jne 1f
  1327. movw %gs,%cx
  1328. cmpw %cx,0x28(%rsp)
  1329. jne 1f
  1330. /* All segments match their saved values => Category 2 (Bad IRET). */
  1331. movq (%rsp),%rcx
  1332. CFI_RESTORE rcx
  1333. movq 8(%rsp),%r11
  1334. CFI_RESTORE r11
  1335. addq $0x30,%rsp
  1336. CFI_ADJUST_CFA_OFFSET -0x30
  1337. pushq $0
  1338. CFI_ADJUST_CFA_OFFSET 8
  1339. pushq %r11
  1340. CFI_ADJUST_CFA_OFFSET 8
  1341. pushq %rcx
  1342. CFI_ADJUST_CFA_OFFSET 8
  1343. jmp general_protection
  1344. CFI_RESTORE_STATE
  1345. 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
  1346. movq (%rsp),%rcx
  1347. CFI_RESTORE rcx
  1348. movq 8(%rsp),%r11
  1349. CFI_RESTORE r11
  1350. addq $0x30,%rsp
  1351. CFI_ADJUST_CFA_OFFSET -0x30
  1352. pushq $0
  1353. CFI_ADJUST_CFA_OFFSET 8
  1354. SAVE_ALL
  1355. jmp error_exit
  1356. CFI_ENDPROC
  1357. END(xen_failsafe_callback)
  1358. #endif /* CONFIG_XEN */