entry_64.S 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. */
  8. /*
  9. * entry.S contains the system-call and fault low-level handling routines.
  10. *
  11. * NOTE: This code handles signal-recognition, which happens every time
  12. * after an interrupt and after each system call.
  13. *
  14. * Normal syscalls and interrupts don't save a full stack frame, this is
  15. * only done for syscall tracing, signals or fork/exec et.al.
  16. *
  17. * A note on terminology:
  18. * - top of stack: Architecture defined interrupt frame from SS to RIP
  19. * at the top of the kernel process stack.
  20. * - partial stack frame: partially saved registers upto R11.
  21. * - full stack frame: Like partial stack frame, but all register saved.
  22. *
  23. * Some macro usage:
  24. * - CFI macros are used to generate dwarf2 unwind information for better
  25. * backtraces. They don't change any code.
  26. * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
  27. * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
  28. * There are unfortunately lots of special cases where some registers
  29. * not touched. The macro is a big mess that should be cleaned up.
  30. * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  31. * Gives a full stack frame.
  32. * - ENTRY/END Define functions in the symbol table.
  33. * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  34. * frame that is otherwise undefined after a SYSCALL
  35. * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  36. * - errorentry/paranoidentry/zeroentry - Define exception entry points.
  37. */
  38. #include <linux/linkage.h>
  39. #include <asm/segment.h>
  40. #include <asm/cache.h>
  41. #include <asm/errno.h>
  42. #include <asm/dwarf2.h>
  43. #include <asm/calling.h>
  44. #include <asm/asm-offsets.h>
  45. #include <asm/msr.h>
  46. #include <asm/unistd.h>
  47. #include <asm/thread_info.h>
  48. #include <asm/hw_irq.h>
  49. #include <asm/page.h>
  50. #include <asm/irqflags.h>
  51. #include <asm/paravirt.h>
  52. #include <asm/ftrace.h>
  53. /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
  54. #include <linux/elf-em.h>
  55. #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
  56. #define __AUDIT_ARCH_64BIT 0x80000000
  57. #define __AUDIT_ARCH_LE 0x40000000
  58. .code64
  59. #ifdef CONFIG_FTRACE
  60. #ifdef CONFIG_DYNAMIC_FTRACE
  61. ENTRY(mcount)
  62. subq $0x38, %rsp
  63. movq %rax, (%rsp)
  64. movq %rcx, 8(%rsp)
  65. movq %rdx, 16(%rsp)
  66. movq %rsi, 24(%rsp)
  67. movq %rdi, 32(%rsp)
  68. movq %r8, 40(%rsp)
  69. movq %r9, 48(%rsp)
  70. movq 0x38(%rsp), %rdi
  71. subq $MCOUNT_INSN_SIZE, %rdi
  72. .globl mcount_call
  73. mcount_call:
  74. call ftrace_stub
  75. movq 48(%rsp), %r9
  76. movq 40(%rsp), %r8
  77. movq 32(%rsp), %rdi
  78. movq 24(%rsp), %rsi
  79. movq 16(%rsp), %rdx
  80. movq 8(%rsp), %rcx
  81. movq (%rsp), %rax
  82. addq $0x38, %rsp
  83. retq
  84. END(mcount)
  85. ENTRY(ftrace_caller)
  86. /* taken from glibc */
  87. subq $0x38, %rsp
  88. movq %rax, (%rsp)
  89. movq %rcx, 8(%rsp)
  90. movq %rdx, 16(%rsp)
  91. movq %rsi, 24(%rsp)
  92. movq %rdi, 32(%rsp)
  93. movq %r8, 40(%rsp)
  94. movq %r9, 48(%rsp)
  95. movq 0x38(%rsp), %rdi
  96. movq 8(%rbp), %rsi
  97. subq $MCOUNT_INSN_SIZE, %rdi
  98. .globl ftrace_call
  99. ftrace_call:
  100. call ftrace_stub
  101. movq 48(%rsp), %r9
  102. movq 40(%rsp), %r8
  103. movq 32(%rsp), %rdi
  104. movq 24(%rsp), %rsi
  105. movq 16(%rsp), %rdx
  106. movq 8(%rsp), %rcx
  107. movq (%rsp), %rax
  108. addq $0x38, %rsp
  109. .globl ftrace_stub
  110. ftrace_stub:
  111. retq
  112. END(ftrace_caller)
  113. #else /* ! CONFIG_DYNAMIC_FTRACE */
  114. ENTRY(mcount)
  115. cmpq $ftrace_stub, ftrace_trace_function
  116. jnz trace
  117. .globl ftrace_stub
  118. ftrace_stub:
  119. retq
  120. trace:
  121. /* taken from glibc */
  122. subq $0x38, %rsp
  123. movq %rax, (%rsp)
  124. movq %rcx, 8(%rsp)
  125. movq %rdx, 16(%rsp)
  126. movq %rsi, 24(%rsp)
  127. movq %rdi, 32(%rsp)
  128. movq %r8, 40(%rsp)
  129. movq %r9, 48(%rsp)
  130. movq 0x38(%rsp), %rdi
  131. movq 8(%rbp), %rsi
  132. subq $MCOUNT_INSN_SIZE, %rdi
  133. call *ftrace_trace_function
  134. movq 48(%rsp), %r9
  135. movq 40(%rsp), %r8
  136. movq 32(%rsp), %rdi
  137. movq 24(%rsp), %rsi
  138. movq 16(%rsp), %rdx
  139. movq 8(%rsp), %rcx
  140. movq (%rsp), %rax
  141. addq $0x38, %rsp
  142. jmp ftrace_stub
  143. END(mcount)
  144. #endif /* CONFIG_DYNAMIC_FTRACE */
  145. #endif /* CONFIG_FTRACE */
  146. #ifndef CONFIG_PREEMPT
  147. #define retint_kernel retint_restore_args
  148. #endif
  149. #ifdef CONFIG_PARAVIRT
  150. ENTRY(native_usergs_sysret64)
  151. swapgs
  152. sysretq
  153. #endif /* CONFIG_PARAVIRT */
  154. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  155. #ifdef CONFIG_TRACE_IRQFLAGS
  156. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  157. jnc 1f
  158. TRACE_IRQS_ON
  159. 1:
  160. #endif
  161. .endm
  162. /*
  163. * C code is not supposed to know about undefined top of stack. Every time
  164. * a C function with an pt_regs argument is called from the SYSCALL based
  165. * fast path FIXUP_TOP_OF_STACK is needed.
  166. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  167. * manipulation.
  168. */
  169. /* %rsp:at FRAMEEND */
  170. .macro FIXUP_TOP_OF_STACK tmp
  171. movq %gs:pda_oldrsp,\tmp
  172. movq \tmp,RSP(%rsp)
  173. movq $__USER_DS,SS(%rsp)
  174. movq $__USER_CS,CS(%rsp)
  175. movq $-1,RCX(%rsp)
  176. movq R11(%rsp),\tmp /* get eflags */
  177. movq \tmp,EFLAGS(%rsp)
  178. .endm
  179. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  180. movq RSP-\offset(%rsp),\tmp
  181. movq \tmp,%gs:pda_oldrsp
  182. movq EFLAGS-\offset(%rsp),\tmp
  183. movq \tmp,R11-\offset(%rsp)
  184. .endm
  185. .macro FAKE_STACK_FRAME child_rip
  186. /* push in order ss, rsp, eflags, cs, rip */
  187. xorl %eax, %eax
  188. pushq $__KERNEL_DS /* ss */
  189. CFI_ADJUST_CFA_OFFSET 8
  190. /*CFI_REL_OFFSET ss,0*/
  191. pushq %rax /* rsp */
  192. CFI_ADJUST_CFA_OFFSET 8
  193. CFI_REL_OFFSET rsp,0
  194. pushq $(1<<9) /* eflags - interrupts on */
  195. CFI_ADJUST_CFA_OFFSET 8
  196. /*CFI_REL_OFFSET rflags,0*/
  197. pushq $__KERNEL_CS /* cs */
  198. CFI_ADJUST_CFA_OFFSET 8
  199. /*CFI_REL_OFFSET cs,0*/
  200. pushq \child_rip /* rip */
  201. CFI_ADJUST_CFA_OFFSET 8
  202. CFI_REL_OFFSET rip,0
  203. pushq %rax /* orig rax */
  204. CFI_ADJUST_CFA_OFFSET 8
  205. .endm
  206. .macro UNFAKE_STACK_FRAME
  207. addq $8*6, %rsp
  208. CFI_ADJUST_CFA_OFFSET -(6*8)
  209. .endm
  210. .macro CFI_DEFAULT_STACK start=1
  211. .if \start
  212. CFI_STARTPROC simple
  213. CFI_SIGNAL_FRAME
  214. CFI_DEF_CFA rsp,SS+8
  215. .else
  216. CFI_DEF_CFA_OFFSET SS+8
  217. .endif
  218. CFI_REL_OFFSET r15,R15
  219. CFI_REL_OFFSET r14,R14
  220. CFI_REL_OFFSET r13,R13
  221. CFI_REL_OFFSET r12,R12
  222. CFI_REL_OFFSET rbp,RBP
  223. CFI_REL_OFFSET rbx,RBX
  224. CFI_REL_OFFSET r11,R11
  225. CFI_REL_OFFSET r10,R10
  226. CFI_REL_OFFSET r9,R9
  227. CFI_REL_OFFSET r8,R8
  228. CFI_REL_OFFSET rax,RAX
  229. CFI_REL_OFFSET rcx,RCX
  230. CFI_REL_OFFSET rdx,RDX
  231. CFI_REL_OFFSET rsi,RSI
  232. CFI_REL_OFFSET rdi,RDI
  233. CFI_REL_OFFSET rip,RIP
  234. /*CFI_REL_OFFSET cs,CS*/
  235. /*CFI_REL_OFFSET rflags,EFLAGS*/
  236. CFI_REL_OFFSET rsp,RSP
  237. /*CFI_REL_OFFSET ss,SS*/
  238. .endm
  239. /*
  240. * A newly forked process directly context switches into this.
  241. */
  242. /* rdi: prev */
  243. ENTRY(ret_from_fork)
  244. CFI_DEFAULT_STACK
  245. push kernel_eflags(%rip)
  246. CFI_ADJUST_CFA_OFFSET 8
  247. popf # reset kernel eflags
  248. CFI_ADJUST_CFA_OFFSET -8
  249. call schedule_tail
  250. GET_THREAD_INFO(%rcx)
  251. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  252. jnz rff_trace
  253. rff_action:
  254. RESTORE_REST
  255. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  256. je int_ret_from_sys_call
  257. testl $_TIF_IA32,TI_flags(%rcx)
  258. jnz int_ret_from_sys_call
  259. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  260. jmp ret_from_sys_call
  261. rff_trace:
  262. movq %rsp,%rdi
  263. call syscall_trace_leave
  264. GET_THREAD_INFO(%rcx)
  265. jmp rff_action
  266. CFI_ENDPROC
  267. END(ret_from_fork)
  268. /*
  269. * System call entry. Upto 6 arguments in registers are supported.
  270. *
  271. * SYSCALL does not save anything on the stack and does not change the
  272. * stack pointer.
  273. */
  274. /*
  275. * Register setup:
  276. * rax system call number
  277. * rdi arg0
  278. * rcx return address for syscall/sysret, C arg3
  279. * rsi arg1
  280. * rdx arg2
  281. * r10 arg3 (--> moved to rcx for C)
  282. * r8 arg4
  283. * r9 arg5
  284. * r11 eflags for syscall/sysret, temporary for C
  285. * r12-r15,rbp,rbx saved by C code, not touched.
  286. *
  287. * Interrupts are off on entry.
  288. * Only called from user space.
  289. *
  290. * XXX if we had a free scratch register we could save the RSP into the stack frame
  291. * and report it properly in ps. Unfortunately we haven't.
  292. *
  293. * When user can change the frames always force IRET. That is because
  294. * it deals with uncanonical addresses better. SYSRET has trouble
  295. * with them due to bugs in both AMD and Intel CPUs.
  296. */
  297. ENTRY(system_call)
  298. CFI_STARTPROC simple
  299. CFI_SIGNAL_FRAME
  300. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  301. CFI_REGISTER rip,rcx
  302. /*CFI_REGISTER rflags,r11*/
  303. SWAPGS_UNSAFE_STACK
  304. /*
  305. * A hypervisor implementation might want to use a label
  306. * after the swapgs, so that it can do the swapgs
  307. * for the guest and jump here on syscall.
  308. */
  309. ENTRY(system_call_after_swapgs)
  310. movq %rsp,%gs:pda_oldrsp
  311. movq %gs:pda_kernelstack,%rsp
  312. /*
  313. * No need to follow this irqs off/on section - it's straight
  314. * and short:
  315. */
  316. ENABLE_INTERRUPTS(CLBR_NONE)
  317. SAVE_ARGS 8,1
  318. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  319. movq %rcx,RIP-ARGOFFSET(%rsp)
  320. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  321. GET_THREAD_INFO(%rcx)
  322. testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
  323. jnz tracesys
  324. system_call_fastpath:
  325. cmpq $__NR_syscall_max,%rax
  326. ja badsys
  327. movq %r10,%rcx
  328. call *sys_call_table(,%rax,8) # XXX: rip relative
  329. movq %rax,RAX-ARGOFFSET(%rsp)
  330. /*
  331. * Syscall return path ending with SYSRET (fast path)
  332. * Has incomplete stack frame and undefined top of stack.
  333. */
  334. ret_from_sys_call:
  335. movl $_TIF_ALLWORK_MASK,%edi
  336. /* edi: flagmask */
  337. sysret_check:
  338. LOCKDEP_SYS_EXIT
  339. GET_THREAD_INFO(%rcx)
  340. DISABLE_INTERRUPTS(CLBR_NONE)
  341. TRACE_IRQS_OFF
  342. movl TI_flags(%rcx),%edx
  343. andl %edi,%edx
  344. jnz sysret_careful
  345. CFI_REMEMBER_STATE
  346. /*
  347. * sysretq will re-enable interrupts:
  348. */
  349. TRACE_IRQS_ON
  350. movq RIP-ARGOFFSET(%rsp),%rcx
  351. CFI_REGISTER rip,rcx
  352. RESTORE_ARGS 0,-ARG_SKIP,1
  353. /*CFI_REGISTER rflags,r11*/
  354. movq %gs:pda_oldrsp, %rsp
  355. USERGS_SYSRET64
  356. CFI_RESTORE_STATE
  357. /* Handle reschedules */
  358. /* edx: work, edi: workmask */
  359. sysret_careful:
  360. bt $TIF_NEED_RESCHED,%edx
  361. jnc sysret_signal
  362. TRACE_IRQS_ON
  363. ENABLE_INTERRUPTS(CLBR_NONE)
  364. pushq %rdi
  365. CFI_ADJUST_CFA_OFFSET 8
  366. call schedule
  367. popq %rdi
  368. CFI_ADJUST_CFA_OFFSET -8
  369. jmp sysret_check
  370. /* Handle a signal */
  371. sysret_signal:
  372. TRACE_IRQS_ON
  373. ENABLE_INTERRUPTS(CLBR_NONE)
  374. #ifdef CONFIG_AUDITSYSCALL
  375. bt $TIF_SYSCALL_AUDIT,%edx
  376. jc sysret_audit
  377. #endif
  378. /* edx: work flags (arg3) */
  379. leaq do_notify_resume(%rip),%rax
  380. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  381. xorl %esi,%esi # oldset -> arg2
  382. call ptregscall_common
  383. movl $_TIF_WORK_MASK,%edi
  384. /* Use IRET because user could have changed frame. This
  385. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  386. DISABLE_INTERRUPTS(CLBR_NONE)
  387. TRACE_IRQS_OFF
  388. jmp int_with_check
  389. badsys:
  390. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  391. jmp ret_from_sys_call
  392. #ifdef CONFIG_AUDITSYSCALL
  393. /*
  394. * Fast path for syscall audit without full syscall trace.
  395. * We just call audit_syscall_entry() directly, and then
  396. * jump back to the normal fast path.
  397. */
  398. auditsys:
  399. movq %r10,%r9 /* 6th arg: 4th syscall arg */
  400. movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
  401. movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
  402. movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
  403. movq %rax,%rsi /* 2nd arg: syscall number */
  404. movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
  405. call audit_syscall_entry
  406. LOAD_ARGS 0 /* reload call-clobbered registers */
  407. jmp system_call_fastpath
  408. /*
  409. * Return fast path for syscall audit. Call audit_syscall_exit()
  410. * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
  411. * masked off.
  412. */
  413. sysret_audit:
  414. movq %rax,%rsi /* second arg, syscall return value */
  415. cmpq $0,%rax /* is it < 0? */
  416. setl %al /* 1 if so, 0 if not */
  417. movzbl %al,%edi /* zero-extend that into %edi */
  418. inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
  419. call audit_syscall_exit
  420. movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
  421. jmp sysret_check
  422. #endif /* CONFIG_AUDITSYSCALL */
  423. /* Do syscall tracing */
  424. tracesys:
  425. #ifdef CONFIG_AUDITSYSCALL
  426. testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  427. jz auditsys
  428. #endif
  429. SAVE_REST
  430. movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
  431. FIXUP_TOP_OF_STACK %rdi
  432. movq %rsp,%rdi
  433. call syscall_trace_enter
  434. /*
  435. * Reload arg registers from stack in case ptrace changed them.
  436. * We don't reload %rax because syscall_trace_enter() returned
  437. * the value it wants us to use in the table lookup.
  438. */
  439. LOAD_ARGS ARGOFFSET, 1
  440. RESTORE_REST
  441. cmpq $__NR_syscall_max,%rax
  442. ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
  443. movq %r10,%rcx /* fixup for C */
  444. call *sys_call_table(,%rax,8)
  445. movq %rax,RAX-ARGOFFSET(%rsp)
  446. /* Use IRET because user could have changed frame */
  447. /*
  448. * Syscall return path ending with IRET.
  449. * Has correct top of stack, but partial stack frame.
  450. */
  451. .globl int_ret_from_sys_call
  452. .globl int_with_check
  453. int_ret_from_sys_call:
  454. DISABLE_INTERRUPTS(CLBR_NONE)
  455. TRACE_IRQS_OFF
  456. testl $3,CS-ARGOFFSET(%rsp)
  457. je retint_restore_args
  458. movl $_TIF_ALLWORK_MASK,%edi
  459. /* edi: mask to check */
  460. int_with_check:
  461. LOCKDEP_SYS_EXIT_IRQ
  462. GET_THREAD_INFO(%rcx)
  463. movl TI_flags(%rcx),%edx
  464. andl %edi,%edx
  465. jnz int_careful
  466. andl $~TS_COMPAT,TI_status(%rcx)
  467. jmp retint_swapgs
  468. /* Either reschedule or signal or syscall exit tracking needed. */
  469. /* First do a reschedule test. */
  470. /* edx: work, edi: workmask */
  471. int_careful:
  472. bt $TIF_NEED_RESCHED,%edx
  473. jnc int_very_careful
  474. TRACE_IRQS_ON
  475. ENABLE_INTERRUPTS(CLBR_NONE)
  476. pushq %rdi
  477. CFI_ADJUST_CFA_OFFSET 8
  478. call schedule
  479. popq %rdi
  480. CFI_ADJUST_CFA_OFFSET -8
  481. DISABLE_INTERRUPTS(CLBR_NONE)
  482. TRACE_IRQS_OFF
  483. jmp int_with_check
  484. /* handle signals and tracing -- both require a full stack frame */
  485. int_very_careful:
  486. TRACE_IRQS_ON
  487. ENABLE_INTERRUPTS(CLBR_NONE)
  488. SAVE_REST
  489. /* Check for syscall exit trace */
  490. testl $_TIF_WORK_SYSCALL_EXIT,%edx
  491. jz int_signal
  492. pushq %rdi
  493. CFI_ADJUST_CFA_OFFSET 8
  494. leaq 8(%rsp),%rdi # &ptregs -> arg1
  495. call syscall_trace_leave
  496. popq %rdi
  497. CFI_ADJUST_CFA_OFFSET -8
  498. andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
  499. jmp int_restore_rest
  500. int_signal:
  501. testl $_TIF_DO_NOTIFY_MASK,%edx
  502. jz 1f
  503. movq %rsp,%rdi # &ptregs -> arg1
  504. xorl %esi,%esi # oldset -> arg2
  505. call do_notify_resume
  506. 1: movl $_TIF_WORK_MASK,%edi
  507. int_restore_rest:
  508. RESTORE_REST
  509. DISABLE_INTERRUPTS(CLBR_NONE)
  510. TRACE_IRQS_OFF
  511. jmp int_with_check
  512. CFI_ENDPROC
  513. END(system_call)
  514. /*
  515. * Certain special system calls that need to save a complete full stack frame.
  516. */
  517. .macro PTREGSCALL label,func,arg
  518. .globl \label
  519. \label:
  520. leaq \func(%rip),%rax
  521. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  522. jmp ptregscall_common
  523. END(\label)
  524. .endm
  525. CFI_STARTPROC
  526. PTREGSCALL stub_clone, sys_clone, %r8
  527. PTREGSCALL stub_fork, sys_fork, %rdi
  528. PTREGSCALL stub_vfork, sys_vfork, %rdi
  529. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  530. PTREGSCALL stub_iopl, sys_iopl, %rsi
  531. ENTRY(ptregscall_common)
  532. popq %r11
  533. CFI_ADJUST_CFA_OFFSET -8
  534. CFI_REGISTER rip, r11
  535. SAVE_REST
  536. movq %r11, %r15
  537. CFI_REGISTER rip, r15
  538. FIXUP_TOP_OF_STACK %r11
  539. call *%rax
  540. RESTORE_TOP_OF_STACK %r11
  541. movq %r15, %r11
  542. CFI_REGISTER rip, r11
  543. RESTORE_REST
  544. pushq %r11
  545. CFI_ADJUST_CFA_OFFSET 8
  546. CFI_REL_OFFSET rip, 0
  547. ret
  548. CFI_ENDPROC
  549. END(ptregscall_common)
  550. ENTRY(stub_execve)
  551. CFI_STARTPROC
  552. popq %r11
  553. CFI_ADJUST_CFA_OFFSET -8
  554. CFI_REGISTER rip, r11
  555. SAVE_REST
  556. FIXUP_TOP_OF_STACK %r11
  557. movq %rsp, %rcx
  558. call sys_execve
  559. RESTORE_TOP_OF_STACK %r11
  560. movq %rax,RAX(%rsp)
  561. RESTORE_REST
  562. jmp int_ret_from_sys_call
  563. CFI_ENDPROC
  564. END(stub_execve)
  565. /*
  566. * sigreturn is special because it needs to restore all registers on return.
  567. * This cannot be done with SYSRET, so use the IRET return path instead.
  568. */
  569. ENTRY(stub_rt_sigreturn)
  570. CFI_STARTPROC
  571. addq $8, %rsp
  572. CFI_ADJUST_CFA_OFFSET -8
  573. SAVE_REST
  574. movq %rsp,%rdi
  575. FIXUP_TOP_OF_STACK %r11
  576. call sys_rt_sigreturn
  577. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  578. RESTORE_REST
  579. jmp int_ret_from_sys_call
  580. CFI_ENDPROC
  581. END(stub_rt_sigreturn)
  582. /*
  583. * initial frame state for interrupts and exceptions
  584. */
  585. .macro _frame ref
  586. CFI_STARTPROC simple
  587. CFI_SIGNAL_FRAME
  588. CFI_DEF_CFA rsp,SS+8-\ref
  589. /*CFI_REL_OFFSET ss,SS-\ref*/
  590. CFI_REL_OFFSET rsp,RSP-\ref
  591. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  592. /*CFI_REL_OFFSET cs,CS-\ref*/
  593. CFI_REL_OFFSET rip,RIP-\ref
  594. .endm
  595. /* initial frame state for interrupts (and exceptions without error code) */
  596. #define INTR_FRAME _frame RIP
  597. /* initial frame state for exceptions with error code (and interrupts with
  598. vector already pushed) */
  599. #define XCPT_FRAME _frame ORIG_RAX
  600. /*
  601. * Interrupt entry/exit.
  602. *
  603. * Interrupt entry points save only callee clobbered registers in fast path.
  604. *
  605. * Entry runs with interrupts off.
  606. */
  607. /* 0(%rsp): interrupt number */
  608. .macro interrupt func
  609. cld
  610. SAVE_ARGS
  611. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  612. pushq %rbp
  613. /*
  614. * Save rbp twice: One is for marking the stack frame, as usual, and the
  615. * other, to fill pt_regs properly. This is because bx comes right
  616. * before the last saved register in that structure, and not bp. If the
  617. * base pointer were in the place bx is today, this would not be needed.
  618. */
  619. movq %rbp, -8(%rsp)
  620. CFI_ADJUST_CFA_OFFSET 8
  621. CFI_REL_OFFSET rbp, 0
  622. movq %rsp,%rbp
  623. CFI_DEF_CFA_REGISTER rbp
  624. testl $3,CS(%rdi)
  625. je 1f
  626. SWAPGS
  627. /* irqcount is used to check if a CPU is already on an interrupt
  628. stack or not. While this is essentially redundant with preempt_count
  629. it is a little cheaper to use a separate counter in the PDA
  630. (short of moving irq_enter into assembly, which would be too
  631. much work) */
  632. 1: incl %gs:pda_irqcount
  633. cmoveq %gs:pda_irqstackptr,%rsp
  634. push %rbp # backlink for old unwinder
  635. /*
  636. * We entered an interrupt context - irqs are off:
  637. */
  638. TRACE_IRQS_OFF
  639. call \func
  640. .endm
  641. ENTRY(common_interrupt)
  642. XCPT_FRAME
  643. interrupt do_IRQ
  644. /* 0(%rsp): oldrsp-ARGOFFSET */
  645. ret_from_intr:
  646. DISABLE_INTERRUPTS(CLBR_NONE)
  647. TRACE_IRQS_OFF
  648. decl %gs:pda_irqcount
  649. leaveq
  650. CFI_DEF_CFA_REGISTER rsp
  651. CFI_ADJUST_CFA_OFFSET -8
  652. exit_intr:
  653. GET_THREAD_INFO(%rcx)
  654. testl $3,CS-ARGOFFSET(%rsp)
  655. je retint_kernel
  656. /* Interrupt came from user space */
  657. /*
  658. * Has a correct top of stack, but a partial stack frame
  659. * %rcx: thread info. Interrupts off.
  660. */
  661. retint_with_reschedule:
  662. movl $_TIF_WORK_MASK,%edi
  663. retint_check:
  664. LOCKDEP_SYS_EXIT_IRQ
  665. movl TI_flags(%rcx),%edx
  666. andl %edi,%edx
  667. CFI_REMEMBER_STATE
  668. jnz retint_careful
  669. retint_swapgs: /* return to user-space */
  670. /*
  671. * The iretq could re-enable interrupts:
  672. */
  673. DISABLE_INTERRUPTS(CLBR_ANY)
  674. TRACE_IRQS_IRETQ
  675. SWAPGS
  676. jmp restore_args
  677. retint_restore_args: /* return to kernel space */
  678. DISABLE_INTERRUPTS(CLBR_ANY)
  679. /*
  680. * The iretq could re-enable interrupts:
  681. */
  682. TRACE_IRQS_IRETQ
  683. restore_args:
  684. RESTORE_ARGS 0,8,0
  685. irq_return:
  686. INTERRUPT_RETURN
  687. .section __ex_table, "a"
  688. .quad irq_return, bad_iret
  689. .previous
  690. #ifdef CONFIG_PARAVIRT
  691. ENTRY(native_iret)
  692. iretq
  693. .section __ex_table,"a"
  694. .quad native_iret, bad_iret
  695. .previous
  696. #endif
  697. .section .fixup,"ax"
  698. bad_iret:
  699. /*
  700. * The iret traps when the %cs or %ss being restored is bogus.
  701. * We've lost the original trap vector and error code.
  702. * #GPF is the most likely one to get for an invalid selector.
  703. * So pretend we completed the iret and took the #GPF in user mode.
  704. *
  705. * We are now running with the kernel GS after exception recovery.
  706. * But error_entry expects us to have user GS to match the user %cs,
  707. * so swap back.
  708. */
  709. pushq $0
  710. SWAPGS
  711. jmp general_protection
  712. .previous
  713. /* edi: workmask, edx: work */
  714. retint_careful:
  715. CFI_RESTORE_STATE
  716. bt $TIF_NEED_RESCHED,%edx
  717. jnc retint_signal
  718. TRACE_IRQS_ON
  719. ENABLE_INTERRUPTS(CLBR_NONE)
  720. pushq %rdi
  721. CFI_ADJUST_CFA_OFFSET 8
  722. call schedule
  723. popq %rdi
  724. CFI_ADJUST_CFA_OFFSET -8
  725. GET_THREAD_INFO(%rcx)
  726. DISABLE_INTERRUPTS(CLBR_NONE)
  727. TRACE_IRQS_OFF
  728. jmp retint_check
  729. retint_signal:
  730. testl $_TIF_DO_NOTIFY_MASK,%edx
  731. jz retint_swapgs
  732. TRACE_IRQS_ON
  733. ENABLE_INTERRUPTS(CLBR_NONE)
  734. SAVE_REST
  735. movq $-1,ORIG_RAX(%rsp)
  736. xorl %esi,%esi # oldset
  737. movq %rsp,%rdi # &pt_regs
  738. call do_notify_resume
  739. RESTORE_REST
  740. DISABLE_INTERRUPTS(CLBR_NONE)
  741. TRACE_IRQS_OFF
  742. GET_THREAD_INFO(%rcx)
  743. jmp retint_with_reschedule
  744. #ifdef CONFIG_PREEMPT
  745. /* Returning to kernel space. Check if we need preemption */
  746. /* rcx: threadinfo. interrupts off. */
  747. ENTRY(retint_kernel)
  748. cmpl $0,TI_preempt_count(%rcx)
  749. jnz retint_restore_args
  750. bt $TIF_NEED_RESCHED,TI_flags(%rcx)
  751. jnc retint_restore_args
  752. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  753. jnc retint_restore_args
  754. call preempt_schedule_irq
  755. jmp exit_intr
  756. #endif
  757. CFI_ENDPROC
  758. END(common_interrupt)
  759. /*
  760. * APIC interrupts.
  761. */
  762. .macro apicinterrupt num,func
  763. INTR_FRAME
  764. pushq $~(\num)
  765. CFI_ADJUST_CFA_OFFSET 8
  766. interrupt \func
  767. jmp ret_from_intr
  768. CFI_ENDPROC
  769. .endm
  770. ENTRY(thermal_interrupt)
  771. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  772. END(thermal_interrupt)
  773. ENTRY(threshold_interrupt)
  774. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  775. END(threshold_interrupt)
  776. #ifdef CONFIG_SMP
  777. ENTRY(reschedule_interrupt)
  778. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  779. END(reschedule_interrupt)
  780. .macro INVALIDATE_ENTRY num
  781. ENTRY(invalidate_interrupt\num)
  782. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  783. END(invalidate_interrupt\num)
  784. .endm
  785. INVALIDATE_ENTRY 0
  786. INVALIDATE_ENTRY 1
  787. INVALIDATE_ENTRY 2
  788. INVALIDATE_ENTRY 3
  789. INVALIDATE_ENTRY 4
  790. INVALIDATE_ENTRY 5
  791. INVALIDATE_ENTRY 6
  792. INVALIDATE_ENTRY 7
  793. ENTRY(call_function_interrupt)
  794. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  795. END(call_function_interrupt)
  796. ENTRY(call_function_single_interrupt)
  797. apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
  798. END(call_function_single_interrupt)
  799. ENTRY(irq_move_cleanup_interrupt)
  800. apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
  801. END(irq_move_cleanup_interrupt)
  802. #endif
  803. ENTRY(apic_timer_interrupt)
  804. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  805. END(apic_timer_interrupt)
  806. ENTRY(uv_bau_message_intr1)
  807. apicinterrupt 220,uv_bau_message_interrupt
  808. END(uv_bau_message_intr1)
  809. ENTRY(error_interrupt)
  810. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  811. END(error_interrupt)
  812. ENTRY(spurious_interrupt)
  813. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  814. END(spurious_interrupt)
  815. /*
  816. * Exception entry points.
  817. */
  818. .macro zeroentry sym
  819. INTR_FRAME
  820. PARAVIRT_ADJUST_EXCEPTION_FRAME
  821. pushq $0 /* push error code/oldrax */
  822. CFI_ADJUST_CFA_OFFSET 8
  823. pushq %rax /* push real oldrax to the rdi slot */
  824. CFI_ADJUST_CFA_OFFSET 8
  825. CFI_REL_OFFSET rax,0
  826. leaq \sym(%rip),%rax
  827. jmp error_entry
  828. CFI_ENDPROC
  829. .endm
  830. .macro errorentry sym
  831. XCPT_FRAME
  832. PARAVIRT_ADJUST_EXCEPTION_FRAME
  833. pushq %rax
  834. CFI_ADJUST_CFA_OFFSET 8
  835. CFI_REL_OFFSET rax,0
  836. leaq \sym(%rip),%rax
  837. jmp error_entry
  838. CFI_ENDPROC
  839. .endm
  840. /* error code is on the stack already */
  841. /* handle NMI like exceptions that can happen everywhere */
  842. .macro paranoidentry sym, ist=0, irqtrace=1
  843. SAVE_ALL
  844. cld
  845. movl $1,%ebx
  846. movl $MSR_GS_BASE,%ecx
  847. rdmsr
  848. testl %edx,%edx
  849. js 1f
  850. SWAPGS
  851. xorl %ebx,%ebx
  852. 1:
  853. .if \ist
  854. movq %gs:pda_data_offset, %rbp
  855. .endif
  856. movq %rsp,%rdi
  857. movq ORIG_RAX(%rsp),%rsi
  858. movq $-1,ORIG_RAX(%rsp)
  859. .if \ist
  860. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  861. .endif
  862. call \sym
  863. .if \ist
  864. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  865. .endif
  866. DISABLE_INTERRUPTS(CLBR_NONE)
  867. .if \irqtrace
  868. TRACE_IRQS_OFF
  869. .endif
  870. .endm
  871. /*
  872. * "Paranoid" exit path from exception stack.
  873. * Paranoid because this is used by NMIs and cannot take
  874. * any kernel state for granted.
  875. * We don't do kernel preemption checks here, because only
  876. * NMI should be common and it does not enable IRQs and
  877. * cannot get reschedule ticks.
  878. *
  879. * "trace" is 0 for the NMI handler only, because irq-tracing
  880. * is fundamentally NMI-unsafe. (we cannot change the soft and
  881. * hard flags at once, atomically)
  882. */
  883. .macro paranoidexit trace=1
  884. /* ebx: no swapgs flag */
  885. paranoid_exit\trace:
  886. testl %ebx,%ebx /* swapgs needed? */
  887. jnz paranoid_restore\trace
  888. testl $3,CS(%rsp)
  889. jnz paranoid_userspace\trace
  890. paranoid_swapgs\trace:
  891. .if \trace
  892. TRACE_IRQS_IRETQ 0
  893. .endif
  894. SWAPGS_UNSAFE_STACK
  895. paranoid_restore\trace:
  896. RESTORE_ALL 8
  897. jmp irq_return
  898. paranoid_userspace\trace:
  899. GET_THREAD_INFO(%rcx)
  900. movl TI_flags(%rcx),%ebx
  901. andl $_TIF_WORK_MASK,%ebx
  902. jz paranoid_swapgs\trace
  903. movq %rsp,%rdi /* &pt_regs */
  904. call sync_regs
  905. movq %rax,%rsp /* switch stack for scheduling */
  906. testl $_TIF_NEED_RESCHED,%ebx
  907. jnz paranoid_schedule\trace
  908. movl %ebx,%edx /* arg3: thread flags */
  909. .if \trace
  910. TRACE_IRQS_ON
  911. .endif
  912. ENABLE_INTERRUPTS(CLBR_NONE)
  913. xorl %esi,%esi /* arg2: oldset */
  914. movq %rsp,%rdi /* arg1: &pt_regs */
  915. call do_notify_resume
  916. DISABLE_INTERRUPTS(CLBR_NONE)
  917. .if \trace
  918. TRACE_IRQS_OFF
  919. .endif
  920. jmp paranoid_userspace\trace
  921. paranoid_schedule\trace:
  922. .if \trace
  923. TRACE_IRQS_ON
  924. .endif
  925. ENABLE_INTERRUPTS(CLBR_ANY)
  926. call schedule
  927. DISABLE_INTERRUPTS(CLBR_ANY)
  928. .if \trace
  929. TRACE_IRQS_OFF
  930. .endif
  931. jmp paranoid_userspace\trace
  932. CFI_ENDPROC
  933. .endm
  934. /*
  935. * Exception entry point. This expects an error code/orig_rax on the stack
  936. * and the exception handler in %rax.
  937. */
  938. KPROBE_ENTRY(error_entry)
  939. _frame RDI
  940. CFI_REL_OFFSET rax,0
  941. /* rdi slot contains rax, oldrax contains error code */
  942. cld
  943. subq $14*8,%rsp
  944. CFI_ADJUST_CFA_OFFSET (14*8)
  945. movq %rsi,13*8(%rsp)
  946. CFI_REL_OFFSET rsi,RSI
  947. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  948. CFI_REGISTER rax,rsi
  949. movq %rdx,12*8(%rsp)
  950. CFI_REL_OFFSET rdx,RDX
  951. movq %rcx,11*8(%rsp)
  952. CFI_REL_OFFSET rcx,RCX
  953. movq %rsi,10*8(%rsp) /* store rax */
  954. CFI_REL_OFFSET rax,RAX
  955. movq %r8, 9*8(%rsp)
  956. CFI_REL_OFFSET r8,R8
  957. movq %r9, 8*8(%rsp)
  958. CFI_REL_OFFSET r9,R9
  959. movq %r10,7*8(%rsp)
  960. CFI_REL_OFFSET r10,R10
  961. movq %r11,6*8(%rsp)
  962. CFI_REL_OFFSET r11,R11
  963. movq %rbx,5*8(%rsp)
  964. CFI_REL_OFFSET rbx,RBX
  965. movq %rbp,4*8(%rsp)
  966. CFI_REL_OFFSET rbp,RBP
  967. movq %r12,3*8(%rsp)
  968. CFI_REL_OFFSET r12,R12
  969. movq %r13,2*8(%rsp)
  970. CFI_REL_OFFSET r13,R13
  971. movq %r14,1*8(%rsp)
  972. CFI_REL_OFFSET r14,R14
  973. movq %r15,(%rsp)
  974. CFI_REL_OFFSET r15,R15
  975. xorl %ebx,%ebx
  976. testl $3,CS(%rsp)
  977. je error_kernelspace
  978. error_swapgs:
  979. SWAPGS
  980. error_sti:
  981. movq %rdi,RDI(%rsp)
  982. CFI_REL_OFFSET rdi,RDI
  983. movq %rsp,%rdi
  984. movq ORIG_RAX(%rsp),%rsi /* get error code */
  985. movq $-1,ORIG_RAX(%rsp)
  986. call *%rax
  987. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  988. error_exit:
  989. movl %ebx,%eax
  990. RESTORE_REST
  991. DISABLE_INTERRUPTS(CLBR_NONE)
  992. TRACE_IRQS_OFF
  993. GET_THREAD_INFO(%rcx)
  994. testl %eax,%eax
  995. jne retint_kernel
  996. LOCKDEP_SYS_EXIT_IRQ
  997. movl TI_flags(%rcx),%edx
  998. movl $_TIF_WORK_MASK,%edi
  999. andl %edi,%edx
  1000. jnz retint_careful
  1001. jmp retint_swapgs
  1002. CFI_ENDPROC
  1003. error_kernelspace:
  1004. incl %ebx
  1005. /* There are two places in the kernel that can potentially fault with
  1006. usergs. Handle them here. The exception handlers after
  1007. iret run with kernel gs again, so don't set the user space flag.
  1008. B stepping K8s sometimes report an truncated RIP for IRET
  1009. exceptions returning to compat mode. Check for these here too. */
  1010. leaq irq_return(%rip),%rcx
  1011. cmpq %rcx,RIP(%rsp)
  1012. je error_swapgs
  1013. movl %ecx,%ecx /* zero extend */
  1014. cmpq %rcx,RIP(%rsp)
  1015. je error_swapgs
  1016. cmpq $gs_change,RIP(%rsp)
  1017. je error_swapgs
  1018. jmp error_sti
  1019. KPROBE_END(error_entry)
  1020. /* Reload gs selector with exception handling */
  1021. /* edi: new selector */
  1022. ENTRY(native_load_gs_index)
  1023. CFI_STARTPROC
  1024. pushf
  1025. CFI_ADJUST_CFA_OFFSET 8
  1026. DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
  1027. SWAPGS
  1028. gs_change:
  1029. movl %edi,%gs
  1030. 2: mfence /* workaround */
  1031. SWAPGS
  1032. popf
  1033. CFI_ADJUST_CFA_OFFSET -8
  1034. ret
  1035. CFI_ENDPROC
  1036. ENDPROC(native_load_gs_index)
  1037. .section __ex_table,"a"
  1038. .align 8
  1039. .quad gs_change,bad_gs
  1040. .previous
  1041. .section .fixup,"ax"
  1042. /* running with kernelgs */
  1043. bad_gs:
  1044. SWAPGS /* switch back to user gs */
  1045. xorl %eax,%eax
  1046. movl %eax,%gs
  1047. jmp 2b
  1048. .previous
  1049. /*
  1050. * Create a kernel thread.
  1051. *
  1052. * C extern interface:
  1053. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  1054. *
  1055. * asm input arguments:
  1056. * rdi: fn, rsi: arg, rdx: flags
  1057. */
  1058. ENTRY(kernel_thread)
  1059. CFI_STARTPROC
  1060. FAKE_STACK_FRAME $child_rip
  1061. SAVE_ALL
  1062. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  1063. movq %rdx,%rdi
  1064. orq kernel_thread_flags(%rip),%rdi
  1065. movq $-1, %rsi
  1066. movq %rsp, %rdx
  1067. xorl %r8d,%r8d
  1068. xorl %r9d,%r9d
  1069. # clone now
  1070. call do_fork
  1071. movq %rax,RAX(%rsp)
  1072. xorl %edi,%edi
  1073. /*
  1074. * It isn't worth to check for reschedule here,
  1075. * so internally to the x86_64 port you can rely on kernel_thread()
  1076. * not to reschedule the child before returning, this avoids the need
  1077. * of hacks for example to fork off the per-CPU idle tasks.
  1078. * [Hopefully no generic code relies on the reschedule -AK]
  1079. */
  1080. RESTORE_ALL
  1081. UNFAKE_STACK_FRAME
  1082. ret
  1083. CFI_ENDPROC
  1084. ENDPROC(kernel_thread)
  1085. child_rip:
  1086. pushq $0 # fake return address
  1087. CFI_STARTPROC
  1088. /*
  1089. * Here we are in the child and the registers are set as they were
  1090. * at kernel_thread() invocation in the parent.
  1091. */
  1092. movq %rdi, %rax
  1093. movq %rsi, %rdi
  1094. call *%rax
  1095. # exit
  1096. mov %eax, %edi
  1097. call do_exit
  1098. CFI_ENDPROC
  1099. ENDPROC(child_rip)
  1100. /*
  1101. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  1102. *
  1103. * C extern interface:
  1104. * extern long execve(char *name, char **argv, char **envp)
  1105. *
  1106. * asm input arguments:
  1107. * rdi: name, rsi: argv, rdx: envp
  1108. *
  1109. * We want to fallback into:
  1110. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
  1111. *
  1112. * do_sys_execve asm fallback arguments:
  1113. * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
  1114. */
  1115. ENTRY(kernel_execve)
  1116. CFI_STARTPROC
  1117. FAKE_STACK_FRAME $0
  1118. SAVE_ALL
  1119. movq %rsp,%rcx
  1120. call sys_execve
  1121. movq %rax, RAX(%rsp)
  1122. RESTORE_REST
  1123. testq %rax,%rax
  1124. je int_ret_from_sys_call
  1125. RESTORE_ARGS
  1126. UNFAKE_STACK_FRAME
  1127. ret
  1128. CFI_ENDPROC
  1129. ENDPROC(kernel_execve)
  1130. KPROBE_ENTRY(page_fault)
  1131. errorentry do_page_fault
  1132. KPROBE_END(page_fault)
  1133. ENTRY(coprocessor_error)
  1134. zeroentry do_coprocessor_error
  1135. END(coprocessor_error)
  1136. ENTRY(simd_coprocessor_error)
  1137. zeroentry do_simd_coprocessor_error
  1138. END(simd_coprocessor_error)
  1139. ENTRY(device_not_available)
  1140. zeroentry math_state_restore
  1141. END(device_not_available)
  1142. /* runs on exception stack */
  1143. KPROBE_ENTRY(debug)
  1144. INTR_FRAME
  1145. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1146. pushq $0
  1147. CFI_ADJUST_CFA_OFFSET 8
  1148. paranoidentry do_debug, DEBUG_STACK
  1149. paranoidexit
  1150. KPROBE_END(debug)
  1151. /* runs on exception stack */
  1152. KPROBE_ENTRY(nmi)
  1153. INTR_FRAME
  1154. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1155. pushq $-1
  1156. CFI_ADJUST_CFA_OFFSET 8
  1157. paranoidentry do_nmi, 0, 0
  1158. #ifdef CONFIG_TRACE_IRQFLAGS
  1159. paranoidexit 0
  1160. #else
  1161. jmp paranoid_exit1
  1162. CFI_ENDPROC
  1163. #endif
  1164. KPROBE_END(nmi)
  1165. KPROBE_ENTRY(int3)
  1166. INTR_FRAME
  1167. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1168. pushq $0
  1169. CFI_ADJUST_CFA_OFFSET 8
  1170. paranoidentry do_int3, DEBUG_STACK
  1171. jmp paranoid_exit1
  1172. CFI_ENDPROC
  1173. KPROBE_END(int3)
  1174. ENTRY(overflow)
  1175. zeroentry do_overflow
  1176. END(overflow)
  1177. ENTRY(bounds)
  1178. zeroentry do_bounds
  1179. END(bounds)
  1180. ENTRY(invalid_op)
  1181. zeroentry do_invalid_op
  1182. END(invalid_op)
  1183. ENTRY(coprocessor_segment_overrun)
  1184. zeroentry do_coprocessor_segment_overrun
  1185. END(coprocessor_segment_overrun)
  1186. /* runs on exception stack */
  1187. ENTRY(double_fault)
  1188. XCPT_FRAME
  1189. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1190. paranoidentry do_double_fault
  1191. jmp paranoid_exit1
  1192. CFI_ENDPROC
  1193. END(double_fault)
  1194. ENTRY(invalid_TSS)
  1195. errorentry do_invalid_TSS
  1196. END(invalid_TSS)
  1197. ENTRY(segment_not_present)
  1198. errorentry do_segment_not_present
  1199. END(segment_not_present)
  1200. /* runs on exception stack */
  1201. ENTRY(stack_segment)
  1202. XCPT_FRAME
  1203. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1204. paranoidentry do_stack_segment
  1205. jmp paranoid_exit1
  1206. CFI_ENDPROC
  1207. END(stack_segment)
  1208. KPROBE_ENTRY(general_protection)
  1209. errorentry do_general_protection
  1210. KPROBE_END(general_protection)
  1211. ENTRY(alignment_check)
  1212. errorentry do_alignment_check
  1213. END(alignment_check)
  1214. ENTRY(divide_error)
  1215. zeroentry do_divide_error
  1216. END(divide_error)
  1217. ENTRY(spurious_interrupt_bug)
  1218. zeroentry do_spurious_interrupt_bug
  1219. END(spurious_interrupt_bug)
  1220. #ifdef CONFIG_X86_MCE
  1221. /* runs on exception stack */
  1222. ENTRY(machine_check)
  1223. INTR_FRAME
  1224. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1225. pushq $0
  1226. CFI_ADJUST_CFA_OFFSET 8
  1227. paranoidentry do_machine_check
  1228. jmp paranoid_exit1
  1229. CFI_ENDPROC
  1230. END(machine_check)
  1231. #endif
  1232. /* Call softirq on interrupt stack. Interrupts are off. */
  1233. ENTRY(call_softirq)
  1234. CFI_STARTPROC
  1235. push %rbp
  1236. CFI_ADJUST_CFA_OFFSET 8
  1237. CFI_REL_OFFSET rbp,0
  1238. mov %rsp,%rbp
  1239. CFI_DEF_CFA_REGISTER rbp
  1240. incl %gs:pda_irqcount
  1241. cmove %gs:pda_irqstackptr,%rsp
  1242. push %rbp # backlink for old unwinder
  1243. call __do_softirq
  1244. leaveq
  1245. CFI_DEF_CFA_REGISTER rsp
  1246. CFI_ADJUST_CFA_OFFSET -8
  1247. decl %gs:pda_irqcount
  1248. ret
  1249. CFI_ENDPROC
  1250. ENDPROC(call_softirq)
  1251. KPROBE_ENTRY(ignore_sysret)
  1252. CFI_STARTPROC
  1253. mov $-ENOSYS,%eax
  1254. sysret
  1255. CFI_ENDPROC
  1256. ENDPROC(ignore_sysret)
  1257. #ifdef CONFIG_XEN
  1258. ENTRY(xen_hypervisor_callback)
  1259. zeroentry xen_do_hypervisor_callback
  1260. END(xen_hypervisor_callback)
  1261. /*
  1262. # A note on the "critical region" in our callback handler.
  1263. # We want to avoid stacking callback handlers due to events occurring
  1264. # during handling of the last event. To do this, we keep events disabled
  1265. # until we've done all processing. HOWEVER, we must enable events before
  1266. # popping the stack frame (can't be done atomically) and so it would still
  1267. # be possible to get enough handler activations to overflow the stack.
  1268. # Although unlikely, bugs of that kind are hard to track down, so we'd
  1269. # like to avoid the possibility.
  1270. # So, on entry to the handler we detect whether we interrupted an
  1271. # existing activation in its critical region -- if so, we pop the current
  1272. # activation and restart the handler using the previous one.
  1273. */
  1274. ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
  1275. CFI_STARTPROC
  1276. /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
  1277. see the correct pointer to the pt_regs */
  1278. movq %rdi, %rsp # we don't return, adjust the stack frame
  1279. CFI_ENDPROC
  1280. CFI_DEFAULT_STACK
  1281. 11: incl %gs:pda_irqcount
  1282. movq %rsp,%rbp
  1283. CFI_DEF_CFA_REGISTER rbp
  1284. cmovzq %gs:pda_irqstackptr,%rsp
  1285. pushq %rbp # backlink for old unwinder
  1286. call xen_evtchn_do_upcall
  1287. popq %rsp
  1288. CFI_DEF_CFA_REGISTER rsp
  1289. decl %gs:pda_irqcount
  1290. jmp error_exit
  1291. CFI_ENDPROC
  1292. END(do_hypervisor_callback)
  1293. /*
  1294. # Hypervisor uses this for application faults while it executes.
  1295. # We get here for two reasons:
  1296. # 1. Fault while reloading DS, ES, FS or GS
  1297. # 2. Fault while executing IRET
  1298. # Category 1 we do not need to fix up as Xen has already reloaded all segment
  1299. # registers that could be reloaded and zeroed the others.
  1300. # Category 2 we fix up by killing the current process. We cannot use the
  1301. # normal Linux return path in this case because if we use the IRET hypercall
  1302. # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
  1303. # We distinguish between categories by comparing each saved segment register
  1304. # with its current contents: any discrepancy means we in category 1.
  1305. */
  1306. ENTRY(xen_failsafe_callback)
  1307. framesz = (RIP-0x30) /* workaround buggy gas */
  1308. _frame framesz
  1309. CFI_REL_OFFSET rcx, 0
  1310. CFI_REL_OFFSET r11, 8
  1311. movw %ds,%cx
  1312. cmpw %cx,0x10(%rsp)
  1313. CFI_REMEMBER_STATE
  1314. jne 1f
  1315. movw %es,%cx
  1316. cmpw %cx,0x18(%rsp)
  1317. jne 1f
  1318. movw %fs,%cx
  1319. cmpw %cx,0x20(%rsp)
  1320. jne 1f
  1321. movw %gs,%cx
  1322. cmpw %cx,0x28(%rsp)
  1323. jne 1f
  1324. /* All segments match their saved values => Category 2 (Bad IRET). */
  1325. movq (%rsp),%rcx
  1326. CFI_RESTORE rcx
  1327. movq 8(%rsp),%r11
  1328. CFI_RESTORE r11
  1329. addq $0x30,%rsp
  1330. CFI_ADJUST_CFA_OFFSET -0x30
  1331. pushq $0
  1332. CFI_ADJUST_CFA_OFFSET 8
  1333. pushq %r11
  1334. CFI_ADJUST_CFA_OFFSET 8
  1335. pushq %rcx
  1336. CFI_ADJUST_CFA_OFFSET 8
  1337. jmp general_protection
  1338. CFI_RESTORE_STATE
  1339. 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
  1340. movq (%rsp),%rcx
  1341. CFI_RESTORE rcx
  1342. movq 8(%rsp),%r11
  1343. CFI_RESTORE r11
  1344. addq $0x30,%rsp
  1345. CFI_ADJUST_CFA_OFFSET -0x30
  1346. pushq $0
  1347. CFI_ADJUST_CFA_OFFSET 8
  1348. SAVE_ALL
  1349. jmp error_exit
  1350. CFI_ENDPROC
  1351. END(xen_failsafe_callback)
  1352. #endif /* CONFIG_XEN */