entry_64.S 33 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. */
  8. /*
  9. * entry.S contains the system-call and fault low-level handling routines.
  10. *
  11. * NOTE: This code handles signal-recognition, which happens every time
  12. * after an interrupt and after each system call.
  13. *
  14. * Normal syscalls and interrupts don't save a full stack frame, this is
  15. * only done for syscall tracing, signals or fork/exec et.al.
  16. *
  17. * A note on terminology:
  18. * - top of stack: Architecture defined interrupt frame from SS to RIP
  19. * at the top of the kernel process stack.
  20. * - partial stack frame: partially saved registers upto R11.
  21. * - full stack frame: Like partial stack frame, but all register saved.
  22. *
  23. * Some macro usage:
  24. * - CFI macros are used to generate dwarf2 unwind information for better
  25. * backtraces. They don't change any code.
  26. * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
  27. * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
  28. * There are unfortunately lots of special cases where some registers
  29. * not touched. The macro is a big mess that should be cleaned up.
  30. * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  31. * Gives a full stack frame.
  32. * - ENTRY/END Define functions in the symbol table.
  33. * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  34. * frame that is otherwise undefined after a SYSCALL
  35. * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  36. * - errorentry/paranoidentry/zeroentry - Define exception entry points.
  37. */
  38. #include <linux/linkage.h>
  39. #include <asm/segment.h>
  40. #include <asm/cache.h>
  41. #include <asm/errno.h>
  42. #include <asm/dwarf2.h>
  43. #include <asm/calling.h>
  44. #include <asm/asm-offsets.h>
  45. #include <asm/msr.h>
  46. #include <asm/unistd.h>
  47. #include <asm/thread_info.h>
  48. #include <asm/hw_irq.h>
  49. #include <asm/page.h>
  50. #include <asm/irqflags.h>
  51. #include <asm/paravirt.h>
  52. #include <asm/ftrace.h>
  53. .code64
  54. #ifdef CONFIG_FTRACE
  55. #ifdef CONFIG_DYNAMIC_FTRACE
  56. ENTRY(mcount)
  57. subq $0x38, %rsp
  58. movq %rax, (%rsp)
  59. movq %rcx, 8(%rsp)
  60. movq %rdx, 16(%rsp)
  61. movq %rsi, 24(%rsp)
  62. movq %rdi, 32(%rsp)
  63. movq %r8, 40(%rsp)
  64. movq %r9, 48(%rsp)
  65. movq 0x38(%rsp), %rdi
  66. subq $MCOUNT_INSN_SIZE, %rdi
  67. .globl mcount_call
  68. mcount_call:
  69. call ftrace_stub
  70. movq 48(%rsp), %r9
  71. movq 40(%rsp), %r8
  72. movq 32(%rsp), %rdi
  73. movq 24(%rsp), %rsi
  74. movq 16(%rsp), %rdx
  75. movq 8(%rsp), %rcx
  76. movq (%rsp), %rax
  77. addq $0x38, %rsp
  78. retq
  79. END(mcount)
  80. ENTRY(ftrace_caller)
  81. /* taken from glibc */
  82. subq $0x38, %rsp
  83. movq %rax, (%rsp)
  84. movq %rcx, 8(%rsp)
  85. movq %rdx, 16(%rsp)
  86. movq %rsi, 24(%rsp)
  87. movq %rdi, 32(%rsp)
  88. movq %r8, 40(%rsp)
  89. movq %r9, 48(%rsp)
  90. movq 0x38(%rsp), %rdi
  91. movq 8(%rbp), %rsi
  92. subq $MCOUNT_INSN_SIZE, %rdi
  93. .globl ftrace_call
  94. ftrace_call:
  95. call ftrace_stub
  96. movq 48(%rsp), %r9
  97. movq 40(%rsp), %r8
  98. movq 32(%rsp), %rdi
  99. movq 24(%rsp), %rsi
  100. movq 16(%rsp), %rdx
  101. movq 8(%rsp), %rcx
  102. movq (%rsp), %rax
  103. addq $0x38, %rsp
  104. .globl ftrace_stub
  105. ftrace_stub:
  106. retq
  107. END(ftrace_caller)
  108. #else /* ! CONFIG_DYNAMIC_FTRACE */
  109. ENTRY(mcount)
  110. cmpq $ftrace_stub, ftrace_trace_function
  111. jnz trace
  112. .globl ftrace_stub
  113. ftrace_stub:
  114. retq
  115. trace:
  116. /* taken from glibc */
  117. subq $0x38, %rsp
  118. movq %rax, (%rsp)
  119. movq %rcx, 8(%rsp)
  120. movq %rdx, 16(%rsp)
  121. movq %rsi, 24(%rsp)
  122. movq %rdi, 32(%rsp)
  123. movq %r8, 40(%rsp)
  124. movq %r9, 48(%rsp)
  125. movq 0x38(%rsp), %rdi
  126. movq 8(%rbp), %rsi
  127. subq $MCOUNT_INSN_SIZE, %rdi
  128. call *ftrace_trace_function
  129. movq 48(%rsp), %r9
  130. movq 40(%rsp), %r8
  131. movq 32(%rsp), %rdi
  132. movq 24(%rsp), %rsi
  133. movq 16(%rsp), %rdx
  134. movq 8(%rsp), %rcx
  135. movq (%rsp), %rax
  136. addq $0x38, %rsp
  137. jmp ftrace_stub
  138. END(mcount)
  139. #endif /* CONFIG_DYNAMIC_FTRACE */
  140. #endif /* CONFIG_FTRACE */
  141. #ifndef CONFIG_PREEMPT
  142. #define retint_kernel retint_restore_args
  143. #endif
  144. #ifdef CONFIG_PARAVIRT
  145. ENTRY(native_usergs_sysret64)
  146. swapgs
  147. sysretq
  148. #endif /* CONFIG_PARAVIRT */
  149. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  150. #ifdef CONFIG_TRACE_IRQFLAGS
  151. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  152. jnc 1f
  153. TRACE_IRQS_ON
  154. 1:
  155. #endif
  156. .endm
  157. /*
  158. * C code is not supposed to know about undefined top of stack. Every time
  159. * a C function with an pt_regs argument is called from the SYSCALL based
  160. * fast path FIXUP_TOP_OF_STACK is needed.
  161. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  162. * manipulation.
  163. */
  164. /* %rsp:at FRAMEEND */
  165. .macro FIXUP_TOP_OF_STACK tmp
  166. movq %gs:pda_oldrsp,\tmp
  167. movq \tmp,RSP(%rsp)
  168. movq $__USER_DS,SS(%rsp)
  169. movq $__USER_CS,CS(%rsp)
  170. movq $-1,RCX(%rsp)
  171. movq R11(%rsp),\tmp /* get eflags */
  172. movq \tmp,EFLAGS(%rsp)
  173. .endm
  174. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  175. movq RSP-\offset(%rsp),\tmp
  176. movq \tmp,%gs:pda_oldrsp
  177. movq EFLAGS-\offset(%rsp),\tmp
  178. movq \tmp,R11-\offset(%rsp)
  179. .endm
  180. .macro FAKE_STACK_FRAME child_rip
  181. /* push in order ss, rsp, eflags, cs, rip */
  182. xorl %eax, %eax
  183. pushq $__KERNEL_DS /* ss */
  184. CFI_ADJUST_CFA_OFFSET 8
  185. /*CFI_REL_OFFSET ss,0*/
  186. pushq %rax /* rsp */
  187. CFI_ADJUST_CFA_OFFSET 8
  188. CFI_REL_OFFSET rsp,0
  189. pushq $(1<<9) /* eflags - interrupts on */
  190. CFI_ADJUST_CFA_OFFSET 8
  191. /*CFI_REL_OFFSET rflags,0*/
  192. pushq $__KERNEL_CS /* cs */
  193. CFI_ADJUST_CFA_OFFSET 8
  194. /*CFI_REL_OFFSET cs,0*/
  195. pushq \child_rip /* rip */
  196. CFI_ADJUST_CFA_OFFSET 8
  197. CFI_REL_OFFSET rip,0
  198. pushq %rax /* orig rax */
  199. CFI_ADJUST_CFA_OFFSET 8
  200. .endm
  201. .macro UNFAKE_STACK_FRAME
  202. addq $8*6, %rsp
  203. CFI_ADJUST_CFA_OFFSET -(6*8)
  204. .endm
  205. .macro CFI_DEFAULT_STACK start=1
  206. .if \start
  207. CFI_STARTPROC simple
  208. CFI_SIGNAL_FRAME
  209. CFI_DEF_CFA rsp,SS+8
  210. .else
  211. CFI_DEF_CFA_OFFSET SS+8
  212. .endif
  213. CFI_REL_OFFSET r15,R15
  214. CFI_REL_OFFSET r14,R14
  215. CFI_REL_OFFSET r13,R13
  216. CFI_REL_OFFSET r12,R12
  217. CFI_REL_OFFSET rbp,RBP
  218. CFI_REL_OFFSET rbx,RBX
  219. CFI_REL_OFFSET r11,R11
  220. CFI_REL_OFFSET r10,R10
  221. CFI_REL_OFFSET r9,R9
  222. CFI_REL_OFFSET r8,R8
  223. CFI_REL_OFFSET rax,RAX
  224. CFI_REL_OFFSET rcx,RCX
  225. CFI_REL_OFFSET rdx,RDX
  226. CFI_REL_OFFSET rsi,RSI
  227. CFI_REL_OFFSET rdi,RDI
  228. CFI_REL_OFFSET rip,RIP
  229. /*CFI_REL_OFFSET cs,CS*/
  230. /*CFI_REL_OFFSET rflags,EFLAGS*/
  231. CFI_REL_OFFSET rsp,RSP
  232. /*CFI_REL_OFFSET ss,SS*/
  233. .endm
  234. /*
  235. * A newly forked process directly context switches into this.
  236. */
  237. /* rdi: prev */
  238. ENTRY(ret_from_fork)
  239. CFI_DEFAULT_STACK
  240. push kernel_eflags(%rip)
  241. CFI_ADJUST_CFA_OFFSET 4
  242. popf # reset kernel eflags
  243. CFI_ADJUST_CFA_OFFSET -4
  244. call schedule_tail
  245. GET_THREAD_INFO(%rcx)
  246. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  247. jnz rff_trace
  248. rff_action:
  249. RESTORE_REST
  250. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  251. je int_ret_from_sys_call
  252. testl $_TIF_IA32,TI_flags(%rcx)
  253. jnz int_ret_from_sys_call
  254. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  255. jmp ret_from_sys_call
  256. rff_trace:
  257. movq %rsp,%rdi
  258. call syscall_trace_leave
  259. GET_THREAD_INFO(%rcx)
  260. jmp rff_action
  261. CFI_ENDPROC
  262. END(ret_from_fork)
  263. /*
  264. * System call entry. Upto 6 arguments in registers are supported.
  265. *
  266. * SYSCALL does not save anything on the stack and does not change the
  267. * stack pointer.
  268. */
  269. /*
  270. * Register setup:
  271. * rax system call number
  272. * rdi arg0
  273. * rcx return address for syscall/sysret, C arg3
  274. * rsi arg1
  275. * rdx arg2
  276. * r10 arg3 (--> moved to rcx for C)
  277. * r8 arg4
  278. * r9 arg5
  279. * r11 eflags for syscall/sysret, temporary for C
  280. * r12-r15,rbp,rbx saved by C code, not touched.
  281. *
  282. * Interrupts are off on entry.
  283. * Only called from user space.
  284. *
  285. * XXX if we had a free scratch register we could save the RSP into the stack frame
  286. * and report it properly in ps. Unfortunately we haven't.
  287. *
  288. * When user can change the frames always force IRET. That is because
  289. * it deals with uncanonical addresses better. SYSRET has trouble
  290. * with them due to bugs in both AMD and Intel CPUs.
  291. */
  292. ENTRY(system_call)
  293. CFI_STARTPROC simple
  294. CFI_SIGNAL_FRAME
  295. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  296. CFI_REGISTER rip,rcx
  297. /*CFI_REGISTER rflags,r11*/
  298. SWAPGS_UNSAFE_STACK
  299. /*
  300. * A hypervisor implementation might want to use a label
  301. * after the swapgs, so that it can do the swapgs
  302. * for the guest and jump here on syscall.
  303. */
  304. ENTRY(system_call_after_swapgs)
  305. movq %rsp,%gs:pda_oldrsp
  306. movq %gs:pda_kernelstack,%rsp
  307. /*
  308. * No need to follow this irqs off/on section - it's straight
  309. * and short:
  310. */
  311. ENABLE_INTERRUPTS(CLBR_NONE)
  312. SAVE_ARGS 8,1
  313. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  314. movq %rcx,RIP-ARGOFFSET(%rsp)
  315. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  316. GET_THREAD_INFO(%rcx)
  317. testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
  318. jnz tracesys
  319. cmpq $__NR_syscall_max,%rax
  320. ja badsys
  321. movq %r10,%rcx
  322. call *sys_call_table(,%rax,8) # XXX: rip relative
  323. movq %rax,RAX-ARGOFFSET(%rsp)
  324. /*
  325. * Syscall return path ending with SYSRET (fast path)
  326. * Has incomplete stack frame and undefined top of stack.
  327. */
  328. ret_from_sys_call:
  329. movl $_TIF_ALLWORK_MASK,%edi
  330. /* edi: flagmask */
  331. sysret_check:
  332. LOCKDEP_SYS_EXIT
  333. GET_THREAD_INFO(%rcx)
  334. DISABLE_INTERRUPTS(CLBR_NONE)
  335. TRACE_IRQS_OFF
  336. movl TI_flags(%rcx),%edx
  337. andl %edi,%edx
  338. jnz sysret_careful
  339. CFI_REMEMBER_STATE
  340. /*
  341. * sysretq will re-enable interrupts:
  342. */
  343. TRACE_IRQS_ON
  344. movq RIP-ARGOFFSET(%rsp),%rcx
  345. CFI_REGISTER rip,rcx
  346. RESTORE_ARGS 0,-ARG_SKIP,1
  347. /*CFI_REGISTER rflags,r11*/
  348. movq %gs:pda_oldrsp, %rsp
  349. USERGS_SYSRET64
  350. CFI_RESTORE_STATE
  351. /* Handle reschedules */
  352. /* edx: work, edi: workmask */
  353. sysret_careful:
  354. bt $TIF_NEED_RESCHED,%edx
  355. jnc sysret_signal
  356. TRACE_IRQS_ON
  357. ENABLE_INTERRUPTS(CLBR_NONE)
  358. pushq %rdi
  359. CFI_ADJUST_CFA_OFFSET 8
  360. call schedule
  361. popq %rdi
  362. CFI_ADJUST_CFA_OFFSET -8
  363. jmp sysret_check
  364. /* Handle a signal */
  365. sysret_signal:
  366. TRACE_IRQS_ON
  367. ENABLE_INTERRUPTS(CLBR_NONE)
  368. /* edx: work flags (arg3) */
  369. leaq do_notify_resume(%rip),%rax
  370. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  371. xorl %esi,%esi # oldset -> arg2
  372. call ptregscall_common
  373. movl $_TIF_WORK_MASK,%edi
  374. /* Use IRET because user could have changed frame. This
  375. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  376. DISABLE_INTERRUPTS(CLBR_NONE)
  377. TRACE_IRQS_OFF
  378. jmp int_with_check
  379. badsys:
  380. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  381. jmp ret_from_sys_call
  382. /* Do syscall tracing */
  383. tracesys:
  384. SAVE_REST
  385. movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
  386. FIXUP_TOP_OF_STACK %rdi
  387. movq %rsp,%rdi
  388. call syscall_trace_enter
  389. /*
  390. * Reload arg registers from stack in case ptrace changed them.
  391. * We don't reload %rax because syscall_trace_enter() returned
  392. * the value it wants us to use in the table lookup.
  393. */
  394. LOAD_ARGS ARGOFFSET, 1
  395. RESTORE_REST
  396. cmpq $__NR_syscall_max,%rax
  397. ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
  398. movq %r10,%rcx /* fixup for C */
  399. call *sys_call_table(,%rax,8)
  400. movq %rax,RAX-ARGOFFSET(%rsp)
  401. /* Use IRET because user could have changed frame */
  402. /*
  403. * Syscall return path ending with IRET.
  404. * Has correct top of stack, but partial stack frame.
  405. */
  406. .globl int_ret_from_sys_call
  407. int_ret_from_sys_call:
  408. DISABLE_INTERRUPTS(CLBR_NONE)
  409. TRACE_IRQS_OFF
  410. testl $3,CS-ARGOFFSET(%rsp)
  411. je retint_restore_args
  412. movl $_TIF_ALLWORK_MASK,%edi
  413. /* edi: mask to check */
  414. int_with_check:
  415. LOCKDEP_SYS_EXIT_IRQ
  416. GET_THREAD_INFO(%rcx)
  417. movl TI_flags(%rcx),%edx
  418. andl %edi,%edx
  419. jnz int_careful
  420. andl $~TS_COMPAT,TI_status(%rcx)
  421. jmp retint_swapgs
  422. /* Either reschedule or signal or syscall exit tracking needed. */
  423. /* First do a reschedule test. */
  424. /* edx: work, edi: workmask */
  425. int_careful:
  426. bt $TIF_NEED_RESCHED,%edx
  427. jnc int_very_careful
  428. TRACE_IRQS_ON
  429. ENABLE_INTERRUPTS(CLBR_NONE)
  430. pushq %rdi
  431. CFI_ADJUST_CFA_OFFSET 8
  432. call schedule
  433. popq %rdi
  434. CFI_ADJUST_CFA_OFFSET -8
  435. DISABLE_INTERRUPTS(CLBR_NONE)
  436. TRACE_IRQS_OFF
  437. jmp int_with_check
  438. /* handle signals and tracing -- both require a full stack frame */
  439. int_very_careful:
  440. TRACE_IRQS_ON
  441. ENABLE_INTERRUPTS(CLBR_NONE)
  442. SAVE_REST
  443. /* Check for syscall exit trace */
  444. testl $_TIF_WORK_SYSCALL_EXIT,%edx
  445. jz int_signal
  446. pushq %rdi
  447. CFI_ADJUST_CFA_OFFSET 8
  448. leaq 8(%rsp),%rdi # &ptregs -> arg1
  449. call syscall_trace_leave
  450. popq %rdi
  451. CFI_ADJUST_CFA_OFFSET -8
  452. andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
  453. jmp int_restore_rest
  454. int_signal:
  455. testl $_TIF_DO_NOTIFY_MASK,%edx
  456. jz 1f
  457. movq %rsp,%rdi # &ptregs -> arg1
  458. xorl %esi,%esi # oldset -> arg2
  459. call do_notify_resume
  460. 1: movl $_TIF_WORK_MASK,%edi
  461. int_restore_rest:
  462. RESTORE_REST
  463. DISABLE_INTERRUPTS(CLBR_NONE)
  464. TRACE_IRQS_OFF
  465. jmp int_with_check
  466. CFI_ENDPROC
  467. END(system_call)
  468. /*
  469. * Certain special system calls that need to save a complete full stack frame.
  470. */
  471. .macro PTREGSCALL label,func,arg
  472. .globl \label
  473. \label:
  474. leaq \func(%rip),%rax
  475. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  476. jmp ptregscall_common
  477. END(\label)
  478. .endm
  479. CFI_STARTPROC
  480. PTREGSCALL stub_clone, sys_clone, %r8
  481. PTREGSCALL stub_fork, sys_fork, %rdi
  482. PTREGSCALL stub_vfork, sys_vfork, %rdi
  483. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  484. PTREGSCALL stub_iopl, sys_iopl, %rsi
  485. ENTRY(ptregscall_common)
  486. popq %r11
  487. CFI_ADJUST_CFA_OFFSET -8
  488. CFI_REGISTER rip, r11
  489. SAVE_REST
  490. movq %r11, %r15
  491. CFI_REGISTER rip, r15
  492. FIXUP_TOP_OF_STACK %r11
  493. call *%rax
  494. RESTORE_TOP_OF_STACK %r11
  495. movq %r15, %r11
  496. CFI_REGISTER rip, r11
  497. RESTORE_REST
  498. pushq %r11
  499. CFI_ADJUST_CFA_OFFSET 8
  500. CFI_REL_OFFSET rip, 0
  501. ret
  502. CFI_ENDPROC
  503. END(ptregscall_common)
  504. ENTRY(stub_execve)
  505. CFI_STARTPROC
  506. popq %r11
  507. CFI_ADJUST_CFA_OFFSET -8
  508. CFI_REGISTER rip, r11
  509. SAVE_REST
  510. FIXUP_TOP_OF_STACK %r11
  511. movq %rsp, %rcx
  512. call sys_execve
  513. RESTORE_TOP_OF_STACK %r11
  514. movq %rax,RAX(%rsp)
  515. RESTORE_REST
  516. jmp int_ret_from_sys_call
  517. CFI_ENDPROC
  518. END(stub_execve)
  519. /*
  520. * sigreturn is special because it needs to restore all registers on return.
  521. * This cannot be done with SYSRET, so use the IRET return path instead.
  522. */
  523. ENTRY(stub_rt_sigreturn)
  524. CFI_STARTPROC
  525. addq $8, %rsp
  526. CFI_ADJUST_CFA_OFFSET -8
  527. SAVE_REST
  528. movq %rsp,%rdi
  529. FIXUP_TOP_OF_STACK %r11
  530. call sys_rt_sigreturn
  531. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  532. RESTORE_REST
  533. jmp int_ret_from_sys_call
  534. CFI_ENDPROC
  535. END(stub_rt_sigreturn)
  536. /*
  537. * initial frame state for interrupts and exceptions
  538. */
  539. .macro _frame ref
  540. CFI_STARTPROC simple
  541. CFI_SIGNAL_FRAME
  542. CFI_DEF_CFA rsp,SS+8-\ref
  543. /*CFI_REL_OFFSET ss,SS-\ref*/
  544. CFI_REL_OFFSET rsp,RSP-\ref
  545. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  546. /*CFI_REL_OFFSET cs,CS-\ref*/
  547. CFI_REL_OFFSET rip,RIP-\ref
  548. .endm
  549. /* initial frame state for interrupts (and exceptions without error code) */
  550. #define INTR_FRAME _frame RIP
  551. /* initial frame state for exceptions with error code (and interrupts with
  552. vector already pushed) */
  553. #define XCPT_FRAME _frame ORIG_RAX
  554. /*
  555. * Interrupt entry/exit.
  556. *
  557. * Interrupt entry points save only callee clobbered registers in fast path.
  558. *
  559. * Entry runs with interrupts off.
  560. */
  561. /* 0(%rsp): interrupt number */
  562. .macro interrupt func
  563. cld
  564. SAVE_ARGS
  565. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  566. pushq %rbp
  567. CFI_ADJUST_CFA_OFFSET 8
  568. CFI_REL_OFFSET rbp, 0
  569. movq %rsp,%rbp
  570. CFI_DEF_CFA_REGISTER rbp
  571. testl $3,CS(%rdi)
  572. je 1f
  573. SWAPGS
  574. /* irqcount is used to check if a CPU is already on an interrupt
  575. stack or not. While this is essentially redundant with preempt_count
  576. it is a little cheaper to use a separate counter in the PDA
  577. (short of moving irq_enter into assembly, which would be too
  578. much work) */
  579. 1: incl %gs:pda_irqcount
  580. cmoveq %gs:pda_irqstackptr,%rsp
  581. push %rbp # backlink for old unwinder
  582. /*
  583. * We entered an interrupt context - irqs are off:
  584. */
  585. TRACE_IRQS_OFF
  586. call \func
  587. .endm
  588. ENTRY(common_interrupt)
  589. XCPT_FRAME
  590. interrupt do_IRQ
  591. /* 0(%rsp): oldrsp-ARGOFFSET */
  592. ret_from_intr:
  593. DISABLE_INTERRUPTS(CLBR_NONE)
  594. TRACE_IRQS_OFF
  595. decl %gs:pda_irqcount
  596. leaveq
  597. CFI_DEF_CFA_REGISTER rsp
  598. CFI_ADJUST_CFA_OFFSET -8
  599. exit_intr:
  600. GET_THREAD_INFO(%rcx)
  601. testl $3,CS-ARGOFFSET(%rsp)
  602. je retint_kernel
  603. /* Interrupt came from user space */
  604. /*
  605. * Has a correct top of stack, but a partial stack frame
  606. * %rcx: thread info. Interrupts off.
  607. */
  608. retint_with_reschedule:
  609. movl $_TIF_WORK_MASK,%edi
  610. retint_check:
  611. LOCKDEP_SYS_EXIT_IRQ
  612. movl TI_flags(%rcx),%edx
  613. andl %edi,%edx
  614. CFI_REMEMBER_STATE
  615. jnz retint_careful
  616. retint_swapgs: /* return to user-space */
  617. /*
  618. * The iretq could re-enable interrupts:
  619. */
  620. DISABLE_INTERRUPTS(CLBR_ANY)
  621. TRACE_IRQS_IRETQ
  622. SWAPGS
  623. jmp restore_args
  624. retint_restore_args: /* return to kernel space */
  625. DISABLE_INTERRUPTS(CLBR_ANY)
  626. /*
  627. * The iretq could re-enable interrupts:
  628. */
  629. TRACE_IRQS_IRETQ
  630. restore_args:
  631. RESTORE_ARGS 0,8,0
  632. irq_return:
  633. INTERRUPT_RETURN
  634. .section __ex_table, "a"
  635. .quad irq_return, bad_iret
  636. .previous
  637. #ifdef CONFIG_PARAVIRT
  638. ENTRY(native_iret)
  639. iretq
  640. .section __ex_table,"a"
  641. .quad native_iret, bad_iret
  642. .previous
  643. #endif
  644. .section .fixup,"ax"
  645. bad_iret:
  646. /*
  647. * The iret traps when the %cs or %ss being restored is bogus.
  648. * We've lost the original trap vector and error code.
  649. * #GPF is the most likely one to get for an invalid selector.
  650. * So pretend we completed the iret and took the #GPF in user mode.
  651. *
  652. * We are now running with the kernel GS after exception recovery.
  653. * But error_entry expects us to have user GS to match the user %cs,
  654. * so swap back.
  655. */
  656. pushq $0
  657. SWAPGS
  658. jmp general_protection
  659. .previous
  660. /* edi: workmask, edx: work */
  661. retint_careful:
  662. CFI_RESTORE_STATE
  663. bt $TIF_NEED_RESCHED,%edx
  664. jnc retint_signal
  665. TRACE_IRQS_ON
  666. ENABLE_INTERRUPTS(CLBR_NONE)
  667. pushq %rdi
  668. CFI_ADJUST_CFA_OFFSET 8
  669. call schedule
  670. popq %rdi
  671. CFI_ADJUST_CFA_OFFSET -8
  672. GET_THREAD_INFO(%rcx)
  673. DISABLE_INTERRUPTS(CLBR_NONE)
  674. TRACE_IRQS_OFF
  675. jmp retint_check
  676. retint_signal:
  677. testl $_TIF_DO_NOTIFY_MASK,%edx
  678. jz retint_swapgs
  679. TRACE_IRQS_ON
  680. ENABLE_INTERRUPTS(CLBR_NONE)
  681. SAVE_REST
  682. movq $-1,ORIG_RAX(%rsp)
  683. xorl %esi,%esi # oldset
  684. movq %rsp,%rdi # &pt_regs
  685. call do_notify_resume
  686. RESTORE_REST
  687. DISABLE_INTERRUPTS(CLBR_NONE)
  688. TRACE_IRQS_OFF
  689. GET_THREAD_INFO(%rcx)
  690. jmp retint_with_reschedule
  691. #ifdef CONFIG_PREEMPT
  692. /* Returning to kernel space. Check if we need preemption */
  693. /* rcx: threadinfo. interrupts off. */
  694. ENTRY(retint_kernel)
  695. cmpl $0,TI_preempt_count(%rcx)
  696. jnz retint_restore_args
  697. bt $TIF_NEED_RESCHED,TI_flags(%rcx)
  698. jnc retint_restore_args
  699. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  700. jnc retint_restore_args
  701. call preempt_schedule_irq
  702. jmp exit_intr
  703. #endif
  704. CFI_ENDPROC
  705. END(common_interrupt)
  706. /*
  707. * APIC interrupts.
  708. */
  709. .macro apicinterrupt num,func
  710. INTR_FRAME
  711. pushq $~(\num)
  712. CFI_ADJUST_CFA_OFFSET 8
  713. interrupt \func
  714. jmp ret_from_intr
  715. CFI_ENDPROC
  716. .endm
  717. ENTRY(thermal_interrupt)
  718. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  719. END(thermal_interrupt)
  720. ENTRY(threshold_interrupt)
  721. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  722. END(threshold_interrupt)
  723. #ifdef CONFIG_SMP
  724. ENTRY(reschedule_interrupt)
  725. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  726. END(reschedule_interrupt)
  727. .macro INVALIDATE_ENTRY num
  728. ENTRY(invalidate_interrupt\num)
  729. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  730. END(invalidate_interrupt\num)
  731. .endm
  732. INVALIDATE_ENTRY 0
  733. INVALIDATE_ENTRY 1
  734. INVALIDATE_ENTRY 2
  735. INVALIDATE_ENTRY 3
  736. INVALIDATE_ENTRY 4
  737. INVALIDATE_ENTRY 5
  738. INVALIDATE_ENTRY 6
  739. INVALIDATE_ENTRY 7
  740. ENTRY(call_function_interrupt)
  741. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  742. END(call_function_interrupt)
  743. ENTRY(call_function_single_interrupt)
  744. apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
  745. END(call_function_single_interrupt)
  746. ENTRY(irq_move_cleanup_interrupt)
  747. apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
  748. END(irq_move_cleanup_interrupt)
  749. #endif
  750. ENTRY(apic_timer_interrupt)
  751. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  752. END(apic_timer_interrupt)
  753. ENTRY(uv_bau_message_intr1)
  754. apicinterrupt 220,uv_bau_message_interrupt
  755. END(uv_bau_message_intr1)
  756. ENTRY(error_interrupt)
  757. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  758. END(error_interrupt)
  759. ENTRY(spurious_interrupt)
  760. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  761. END(spurious_interrupt)
  762. /*
  763. * Exception entry points.
  764. */
  765. .macro zeroentry sym
  766. INTR_FRAME
  767. PARAVIRT_ADJUST_EXCEPTION_FRAME
  768. pushq $0 /* push error code/oldrax */
  769. CFI_ADJUST_CFA_OFFSET 8
  770. pushq %rax /* push real oldrax to the rdi slot */
  771. CFI_ADJUST_CFA_OFFSET 8
  772. CFI_REL_OFFSET rax,0
  773. leaq \sym(%rip),%rax
  774. jmp error_entry
  775. CFI_ENDPROC
  776. .endm
  777. .macro errorentry sym
  778. XCPT_FRAME
  779. PARAVIRT_ADJUST_EXCEPTION_FRAME
  780. pushq %rax
  781. CFI_ADJUST_CFA_OFFSET 8
  782. CFI_REL_OFFSET rax,0
  783. leaq \sym(%rip),%rax
  784. jmp error_entry
  785. CFI_ENDPROC
  786. .endm
  787. /* error code is on the stack already */
  788. /* handle NMI like exceptions that can happen everywhere */
  789. .macro paranoidentry sym, ist=0, irqtrace=1
  790. SAVE_ALL
  791. cld
  792. movl $1,%ebx
  793. movl $MSR_GS_BASE,%ecx
  794. rdmsr
  795. testl %edx,%edx
  796. js 1f
  797. SWAPGS
  798. xorl %ebx,%ebx
  799. 1:
  800. .if \ist
  801. movq %gs:pda_data_offset, %rbp
  802. .endif
  803. movq %rsp,%rdi
  804. movq ORIG_RAX(%rsp),%rsi
  805. movq $-1,ORIG_RAX(%rsp)
  806. .if \ist
  807. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  808. .endif
  809. call \sym
  810. .if \ist
  811. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  812. .endif
  813. DISABLE_INTERRUPTS(CLBR_NONE)
  814. .if \irqtrace
  815. TRACE_IRQS_OFF
  816. .endif
  817. .endm
  818. /*
  819. * "Paranoid" exit path from exception stack.
  820. * Paranoid because this is used by NMIs and cannot take
  821. * any kernel state for granted.
  822. * We don't do kernel preemption checks here, because only
  823. * NMI should be common and it does not enable IRQs and
  824. * cannot get reschedule ticks.
  825. *
  826. * "trace" is 0 for the NMI handler only, because irq-tracing
  827. * is fundamentally NMI-unsafe. (we cannot change the soft and
  828. * hard flags at once, atomically)
  829. */
  830. .macro paranoidexit trace=1
  831. /* ebx: no swapgs flag */
  832. paranoid_exit\trace:
  833. testl %ebx,%ebx /* swapgs needed? */
  834. jnz paranoid_restore\trace
  835. testl $3,CS(%rsp)
  836. jnz paranoid_userspace\trace
  837. paranoid_swapgs\trace:
  838. .if \trace
  839. TRACE_IRQS_IRETQ 0
  840. .endif
  841. SWAPGS_UNSAFE_STACK
  842. paranoid_restore\trace:
  843. RESTORE_ALL 8
  844. jmp irq_return
  845. paranoid_userspace\trace:
  846. GET_THREAD_INFO(%rcx)
  847. movl TI_flags(%rcx),%ebx
  848. andl $_TIF_WORK_MASK,%ebx
  849. jz paranoid_swapgs\trace
  850. movq %rsp,%rdi /* &pt_regs */
  851. call sync_regs
  852. movq %rax,%rsp /* switch stack for scheduling */
  853. testl $_TIF_NEED_RESCHED,%ebx
  854. jnz paranoid_schedule\trace
  855. movl %ebx,%edx /* arg3: thread flags */
  856. .if \trace
  857. TRACE_IRQS_ON
  858. .endif
  859. ENABLE_INTERRUPTS(CLBR_NONE)
  860. xorl %esi,%esi /* arg2: oldset */
  861. movq %rsp,%rdi /* arg1: &pt_regs */
  862. call do_notify_resume
  863. DISABLE_INTERRUPTS(CLBR_NONE)
  864. .if \trace
  865. TRACE_IRQS_OFF
  866. .endif
  867. jmp paranoid_userspace\trace
  868. paranoid_schedule\trace:
  869. .if \trace
  870. TRACE_IRQS_ON
  871. .endif
  872. ENABLE_INTERRUPTS(CLBR_ANY)
  873. call schedule
  874. DISABLE_INTERRUPTS(CLBR_ANY)
  875. .if \trace
  876. TRACE_IRQS_OFF
  877. .endif
  878. jmp paranoid_userspace\trace
  879. CFI_ENDPROC
  880. .endm
  881. /*
  882. * Exception entry point. This expects an error code/orig_rax on the stack
  883. * and the exception handler in %rax.
  884. */
  885. KPROBE_ENTRY(error_entry)
  886. _frame RDI
  887. CFI_REL_OFFSET rax,0
  888. /* rdi slot contains rax, oldrax contains error code */
  889. cld
  890. subq $14*8,%rsp
  891. CFI_ADJUST_CFA_OFFSET (14*8)
  892. movq %rsi,13*8(%rsp)
  893. CFI_REL_OFFSET rsi,RSI
  894. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  895. CFI_REGISTER rax,rsi
  896. movq %rdx,12*8(%rsp)
  897. CFI_REL_OFFSET rdx,RDX
  898. movq %rcx,11*8(%rsp)
  899. CFI_REL_OFFSET rcx,RCX
  900. movq %rsi,10*8(%rsp) /* store rax */
  901. CFI_REL_OFFSET rax,RAX
  902. movq %r8, 9*8(%rsp)
  903. CFI_REL_OFFSET r8,R8
  904. movq %r9, 8*8(%rsp)
  905. CFI_REL_OFFSET r9,R9
  906. movq %r10,7*8(%rsp)
  907. CFI_REL_OFFSET r10,R10
  908. movq %r11,6*8(%rsp)
  909. CFI_REL_OFFSET r11,R11
  910. movq %rbx,5*8(%rsp)
  911. CFI_REL_OFFSET rbx,RBX
  912. movq %rbp,4*8(%rsp)
  913. CFI_REL_OFFSET rbp,RBP
  914. movq %r12,3*8(%rsp)
  915. CFI_REL_OFFSET r12,R12
  916. movq %r13,2*8(%rsp)
  917. CFI_REL_OFFSET r13,R13
  918. movq %r14,1*8(%rsp)
  919. CFI_REL_OFFSET r14,R14
  920. movq %r15,(%rsp)
  921. CFI_REL_OFFSET r15,R15
  922. xorl %ebx,%ebx
  923. testl $3,CS(%rsp)
  924. je error_kernelspace
  925. error_swapgs:
  926. SWAPGS
  927. error_sti:
  928. movq %rdi,RDI(%rsp)
  929. CFI_REL_OFFSET rdi,RDI
  930. movq %rsp,%rdi
  931. movq ORIG_RAX(%rsp),%rsi /* get error code */
  932. movq $-1,ORIG_RAX(%rsp)
  933. call *%rax
  934. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  935. error_exit:
  936. movl %ebx,%eax
  937. RESTORE_REST
  938. DISABLE_INTERRUPTS(CLBR_NONE)
  939. TRACE_IRQS_OFF
  940. GET_THREAD_INFO(%rcx)
  941. testl %eax,%eax
  942. jne retint_kernel
  943. LOCKDEP_SYS_EXIT_IRQ
  944. movl TI_flags(%rcx),%edx
  945. movl $_TIF_WORK_MASK,%edi
  946. andl %edi,%edx
  947. jnz retint_careful
  948. jmp retint_swapgs
  949. CFI_ENDPROC
  950. error_kernelspace:
  951. incl %ebx
  952. /* There are two places in the kernel that can potentially fault with
  953. usergs. Handle them here. The exception handlers after
  954. iret run with kernel gs again, so don't set the user space flag.
  955. B stepping K8s sometimes report an truncated RIP for IRET
  956. exceptions returning to compat mode. Check for these here too. */
  957. leaq irq_return(%rip),%rcx
  958. cmpq %rcx,RIP(%rsp)
  959. je error_swapgs
  960. movl %ecx,%ecx /* zero extend */
  961. cmpq %rcx,RIP(%rsp)
  962. je error_swapgs
  963. cmpq $gs_change,RIP(%rsp)
  964. je error_swapgs
  965. jmp error_sti
  966. KPROBE_END(error_entry)
  967. /* Reload gs selector with exception handling */
  968. /* edi: new selector */
  969. ENTRY(native_load_gs_index)
  970. CFI_STARTPROC
  971. pushf
  972. CFI_ADJUST_CFA_OFFSET 8
  973. DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
  974. SWAPGS
  975. gs_change:
  976. movl %edi,%gs
  977. 2: mfence /* workaround */
  978. SWAPGS
  979. popf
  980. CFI_ADJUST_CFA_OFFSET -8
  981. ret
  982. CFI_ENDPROC
  983. ENDPROC(native_load_gs_index)
  984. .section __ex_table,"a"
  985. .align 8
  986. .quad gs_change,bad_gs
  987. .previous
  988. .section .fixup,"ax"
  989. /* running with kernelgs */
  990. bad_gs:
  991. SWAPGS /* switch back to user gs */
  992. xorl %eax,%eax
  993. movl %eax,%gs
  994. jmp 2b
  995. .previous
  996. /*
  997. * Create a kernel thread.
  998. *
  999. * C extern interface:
  1000. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  1001. *
  1002. * asm input arguments:
  1003. * rdi: fn, rsi: arg, rdx: flags
  1004. */
  1005. ENTRY(kernel_thread)
  1006. CFI_STARTPROC
  1007. FAKE_STACK_FRAME $child_rip
  1008. SAVE_ALL
  1009. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  1010. movq %rdx,%rdi
  1011. orq kernel_thread_flags(%rip),%rdi
  1012. movq $-1, %rsi
  1013. movq %rsp, %rdx
  1014. xorl %r8d,%r8d
  1015. xorl %r9d,%r9d
  1016. # clone now
  1017. call do_fork
  1018. movq %rax,RAX(%rsp)
  1019. xorl %edi,%edi
  1020. /*
  1021. * It isn't worth to check for reschedule here,
  1022. * so internally to the x86_64 port you can rely on kernel_thread()
  1023. * not to reschedule the child before returning, this avoids the need
  1024. * of hacks for example to fork off the per-CPU idle tasks.
  1025. * [Hopefully no generic code relies on the reschedule -AK]
  1026. */
  1027. RESTORE_ALL
  1028. UNFAKE_STACK_FRAME
  1029. ret
  1030. CFI_ENDPROC
  1031. ENDPROC(kernel_thread)
  1032. child_rip:
  1033. pushq $0 # fake return address
  1034. CFI_STARTPROC
  1035. /*
  1036. * Here we are in the child and the registers are set as they were
  1037. * at kernel_thread() invocation in the parent.
  1038. */
  1039. movq %rdi, %rax
  1040. movq %rsi, %rdi
  1041. call *%rax
  1042. # exit
  1043. mov %eax, %edi
  1044. call do_exit
  1045. CFI_ENDPROC
  1046. ENDPROC(child_rip)
  1047. /*
  1048. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  1049. *
  1050. * C extern interface:
  1051. * extern long execve(char *name, char **argv, char **envp)
  1052. *
  1053. * asm input arguments:
  1054. * rdi: name, rsi: argv, rdx: envp
  1055. *
  1056. * We want to fallback into:
  1057. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
  1058. *
  1059. * do_sys_execve asm fallback arguments:
  1060. * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
  1061. */
  1062. ENTRY(kernel_execve)
  1063. CFI_STARTPROC
  1064. FAKE_STACK_FRAME $0
  1065. SAVE_ALL
  1066. movq %rsp,%rcx
  1067. call sys_execve
  1068. movq %rax, RAX(%rsp)
  1069. RESTORE_REST
  1070. testq %rax,%rax
  1071. je int_ret_from_sys_call
  1072. RESTORE_ARGS
  1073. UNFAKE_STACK_FRAME
  1074. ret
  1075. CFI_ENDPROC
  1076. ENDPROC(kernel_execve)
  1077. KPROBE_ENTRY(page_fault)
  1078. errorentry do_page_fault
  1079. KPROBE_END(page_fault)
  1080. ENTRY(coprocessor_error)
  1081. zeroentry do_coprocessor_error
  1082. END(coprocessor_error)
  1083. ENTRY(simd_coprocessor_error)
  1084. zeroentry do_simd_coprocessor_error
  1085. END(simd_coprocessor_error)
  1086. ENTRY(device_not_available)
  1087. zeroentry math_state_restore
  1088. END(device_not_available)
  1089. /* runs on exception stack */
  1090. KPROBE_ENTRY(debug)
  1091. INTR_FRAME
  1092. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1093. pushq $0
  1094. CFI_ADJUST_CFA_OFFSET 8
  1095. paranoidentry do_debug, DEBUG_STACK
  1096. paranoidexit
  1097. KPROBE_END(debug)
  1098. /* runs on exception stack */
  1099. KPROBE_ENTRY(nmi)
  1100. INTR_FRAME
  1101. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1102. pushq $-1
  1103. CFI_ADJUST_CFA_OFFSET 8
  1104. paranoidentry do_nmi, 0, 0
  1105. #ifdef CONFIG_TRACE_IRQFLAGS
  1106. paranoidexit 0
  1107. #else
  1108. jmp paranoid_exit1
  1109. CFI_ENDPROC
  1110. #endif
  1111. KPROBE_END(nmi)
  1112. KPROBE_ENTRY(int3)
  1113. INTR_FRAME
  1114. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1115. pushq $0
  1116. CFI_ADJUST_CFA_OFFSET 8
  1117. paranoidentry do_int3, DEBUG_STACK
  1118. jmp paranoid_exit1
  1119. CFI_ENDPROC
  1120. KPROBE_END(int3)
  1121. ENTRY(overflow)
  1122. zeroentry do_overflow
  1123. END(overflow)
  1124. ENTRY(bounds)
  1125. zeroentry do_bounds
  1126. END(bounds)
  1127. ENTRY(invalid_op)
  1128. zeroentry do_invalid_op
  1129. END(invalid_op)
  1130. ENTRY(coprocessor_segment_overrun)
  1131. zeroentry do_coprocessor_segment_overrun
  1132. END(coprocessor_segment_overrun)
  1133. /* runs on exception stack */
  1134. ENTRY(double_fault)
  1135. XCPT_FRAME
  1136. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1137. paranoidentry do_double_fault
  1138. jmp paranoid_exit1
  1139. CFI_ENDPROC
  1140. END(double_fault)
  1141. ENTRY(invalid_TSS)
  1142. errorentry do_invalid_TSS
  1143. END(invalid_TSS)
  1144. ENTRY(segment_not_present)
  1145. errorentry do_segment_not_present
  1146. END(segment_not_present)
  1147. /* runs on exception stack */
  1148. ENTRY(stack_segment)
  1149. XCPT_FRAME
  1150. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1151. paranoidentry do_stack_segment
  1152. jmp paranoid_exit1
  1153. CFI_ENDPROC
  1154. END(stack_segment)
  1155. KPROBE_ENTRY(general_protection)
  1156. errorentry do_general_protection
  1157. KPROBE_END(general_protection)
  1158. ENTRY(alignment_check)
  1159. errorentry do_alignment_check
  1160. END(alignment_check)
  1161. ENTRY(divide_error)
  1162. zeroentry do_divide_error
  1163. END(divide_error)
  1164. ENTRY(spurious_interrupt_bug)
  1165. zeroentry do_spurious_interrupt_bug
  1166. END(spurious_interrupt_bug)
  1167. #ifdef CONFIG_X86_MCE
  1168. /* runs on exception stack */
  1169. ENTRY(machine_check)
  1170. INTR_FRAME
  1171. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1172. pushq $0
  1173. CFI_ADJUST_CFA_OFFSET 8
  1174. paranoidentry do_machine_check
  1175. jmp paranoid_exit1
  1176. CFI_ENDPROC
  1177. END(machine_check)
  1178. #endif
  1179. /* Call softirq on interrupt stack. Interrupts are off. */
  1180. ENTRY(call_softirq)
  1181. CFI_STARTPROC
  1182. push %rbp
  1183. CFI_ADJUST_CFA_OFFSET 8
  1184. CFI_REL_OFFSET rbp,0
  1185. mov %rsp,%rbp
  1186. CFI_DEF_CFA_REGISTER rbp
  1187. incl %gs:pda_irqcount
  1188. cmove %gs:pda_irqstackptr,%rsp
  1189. push %rbp # backlink for old unwinder
  1190. call __do_softirq
  1191. leaveq
  1192. CFI_DEF_CFA_REGISTER rsp
  1193. CFI_ADJUST_CFA_OFFSET -8
  1194. decl %gs:pda_irqcount
  1195. ret
  1196. CFI_ENDPROC
  1197. ENDPROC(call_softirq)
  1198. KPROBE_ENTRY(ignore_sysret)
  1199. CFI_STARTPROC
  1200. mov $-ENOSYS,%eax
  1201. sysret
  1202. CFI_ENDPROC
  1203. ENDPROC(ignore_sysret)
  1204. #ifdef CONFIG_XEN
  1205. ENTRY(xen_hypervisor_callback)
  1206. zeroentry xen_do_hypervisor_callback
  1207. END(xen_hypervisor_callback)
  1208. /*
  1209. # A note on the "critical region" in our callback handler.
  1210. # We want to avoid stacking callback handlers due to events occurring
  1211. # during handling of the last event. To do this, we keep events disabled
  1212. # until we've done all processing. HOWEVER, we must enable events before
  1213. # popping the stack frame (can't be done atomically) and so it would still
  1214. # be possible to get enough handler activations to overflow the stack.
  1215. # Although unlikely, bugs of that kind are hard to track down, so we'd
  1216. # like to avoid the possibility.
  1217. # So, on entry to the handler we detect whether we interrupted an
  1218. # existing activation in its critical region -- if so, we pop the current
  1219. # activation and restart the handler using the previous one.
  1220. */
  1221. ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
  1222. CFI_STARTPROC
  1223. /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
  1224. see the correct pointer to the pt_regs */
  1225. movq %rdi, %rsp # we don't return, adjust the stack frame
  1226. CFI_ENDPROC
  1227. CFI_DEFAULT_STACK
  1228. 11: incl %gs:pda_irqcount
  1229. movq %rsp,%rbp
  1230. CFI_DEF_CFA_REGISTER rbp
  1231. cmovzq %gs:pda_irqstackptr,%rsp
  1232. pushq %rbp # backlink for old unwinder
  1233. call xen_evtchn_do_upcall
  1234. popq %rsp
  1235. CFI_DEF_CFA_REGISTER rsp
  1236. decl %gs:pda_irqcount
  1237. jmp error_exit
  1238. CFI_ENDPROC
  1239. END(do_hypervisor_callback)
  1240. /*
  1241. # Hypervisor uses this for application faults while it executes.
  1242. # We get here for two reasons:
  1243. # 1. Fault while reloading DS, ES, FS or GS
  1244. # 2. Fault while executing IRET
  1245. # Category 1 we do not need to fix up as Xen has already reloaded all segment
  1246. # registers that could be reloaded and zeroed the others.
  1247. # Category 2 we fix up by killing the current process. We cannot use the
  1248. # normal Linux return path in this case because if we use the IRET hypercall
  1249. # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
  1250. # We distinguish between categories by comparing each saved segment register
  1251. # with its current contents: any discrepancy means we in category 1.
  1252. */
  1253. ENTRY(xen_failsafe_callback)
  1254. framesz = (RIP-0x30) /* workaround buggy gas */
  1255. _frame framesz
  1256. CFI_REL_OFFSET rcx, 0
  1257. CFI_REL_OFFSET r11, 8
  1258. movw %ds,%cx
  1259. cmpw %cx,0x10(%rsp)
  1260. CFI_REMEMBER_STATE
  1261. jne 1f
  1262. movw %es,%cx
  1263. cmpw %cx,0x18(%rsp)
  1264. jne 1f
  1265. movw %fs,%cx
  1266. cmpw %cx,0x20(%rsp)
  1267. jne 1f
  1268. movw %gs,%cx
  1269. cmpw %cx,0x28(%rsp)
  1270. jne 1f
  1271. /* All segments match their saved values => Category 2 (Bad IRET). */
  1272. movq (%rsp),%rcx
  1273. CFI_RESTORE rcx
  1274. movq 8(%rsp),%r11
  1275. CFI_RESTORE r11
  1276. addq $0x30,%rsp
  1277. CFI_ADJUST_CFA_OFFSET -0x30
  1278. pushq $0
  1279. CFI_ADJUST_CFA_OFFSET 8
  1280. pushq %r11
  1281. CFI_ADJUST_CFA_OFFSET 8
  1282. pushq %rcx
  1283. CFI_ADJUST_CFA_OFFSET 8
  1284. jmp general_protection
  1285. CFI_RESTORE_STATE
  1286. 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
  1287. movq (%rsp),%rcx
  1288. CFI_RESTORE rcx
  1289. movq 8(%rsp),%r11
  1290. CFI_RESTORE r11
  1291. addq $0x30,%rsp
  1292. CFI_ADJUST_CFA_OFFSET -0x30
  1293. pushq $0
  1294. CFI_ADJUST_CFA_OFFSET 8
  1295. SAVE_ALL
  1296. jmp error_exit
  1297. CFI_ENDPROC
  1298. END(xen_failsafe_callback)
  1299. #endif /* CONFIG_XEN */