entry_64.S 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. */
  8. /*
  9. * entry.S contains the system-call and fault low-level handling routines.
  10. *
  11. * NOTE: This code handles signal-recognition, which happens every time
  12. * after an interrupt and after each system call.
  13. *
  14. * Normal syscalls and interrupts don't save a full stack frame, this is
  15. * only done for syscall tracing, signals or fork/exec et.al.
  16. *
  17. * A note on terminology:
  18. * - top of stack: Architecture defined interrupt frame from SS to RIP
  19. * at the top of the kernel process stack.
  20. * - partial stack frame: partially saved registers upto R11.
  21. * - full stack frame: Like partial stack frame, but all register saved.
  22. *
  23. * Some macro usage:
  24. * - CFI macros are used to generate dwarf2 unwind information for better
  25. * backtraces. They don't change any code.
  26. * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
  27. * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
  28. * There are unfortunately lots of special cases where some registers
  29. * not touched. The macro is a big mess that should be cleaned up.
  30. * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  31. * Gives a full stack frame.
  32. * - ENTRY/END Define functions in the symbol table.
  33. * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  34. * frame that is otherwise undefined after a SYSCALL
  35. * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  36. * - errorentry/paranoidentry/zeroentry - Define exception entry points.
  37. */
  38. #include <linux/linkage.h>
  39. #include <asm/segment.h>
  40. #include <asm/cache.h>
  41. #include <asm/errno.h>
  42. #include <asm/dwarf2.h>
  43. #include <asm/calling.h>
  44. #include <asm/asm-offsets.h>
  45. #include <asm/msr.h>
  46. #include <asm/unistd.h>
  47. #include <asm/thread_info.h>
  48. #include <asm/hw_irq.h>
  49. #include <asm/page.h>
  50. #include <asm/irqflags.h>
  51. #include <asm/paravirt.h>
  52. #include <asm/ftrace.h>
  53. /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
  54. #include <linux/elf-em.h>
  55. #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
  56. #define __AUDIT_ARCH_64BIT 0x80000000
  57. #define __AUDIT_ARCH_LE 0x40000000
  58. .code64
  59. #ifdef CONFIG_FUNCTION_TRACER
  60. #ifdef CONFIG_DYNAMIC_FTRACE
  61. ENTRY(mcount)
  62. retq
  63. END(mcount)
  64. ENTRY(ftrace_caller)
  65. /* taken from glibc */
  66. subq $0x38, %rsp
  67. movq %rax, (%rsp)
  68. movq %rcx, 8(%rsp)
  69. movq %rdx, 16(%rsp)
  70. movq %rsi, 24(%rsp)
  71. movq %rdi, 32(%rsp)
  72. movq %r8, 40(%rsp)
  73. movq %r9, 48(%rsp)
  74. movq 0x38(%rsp), %rdi
  75. movq 8(%rbp), %rsi
  76. subq $MCOUNT_INSN_SIZE, %rdi
  77. .globl ftrace_call
  78. ftrace_call:
  79. call ftrace_stub
  80. movq 48(%rsp), %r9
  81. movq 40(%rsp), %r8
  82. movq 32(%rsp), %rdi
  83. movq 24(%rsp), %rsi
  84. movq 16(%rsp), %rdx
  85. movq 8(%rsp), %rcx
  86. movq (%rsp), %rax
  87. addq $0x38, %rsp
  88. .globl ftrace_stub
  89. ftrace_stub:
  90. retq
  91. END(ftrace_caller)
  92. #else /* ! CONFIG_DYNAMIC_FTRACE */
  93. ENTRY(mcount)
  94. cmpq $ftrace_stub, ftrace_trace_function
  95. jnz trace
  96. .globl ftrace_stub
  97. ftrace_stub:
  98. retq
  99. trace:
  100. /* taken from glibc */
  101. subq $0x38, %rsp
  102. movq %rax, (%rsp)
  103. movq %rcx, 8(%rsp)
  104. movq %rdx, 16(%rsp)
  105. movq %rsi, 24(%rsp)
  106. movq %rdi, 32(%rsp)
  107. movq %r8, 40(%rsp)
  108. movq %r9, 48(%rsp)
  109. movq 0x38(%rsp), %rdi
  110. movq 8(%rbp), %rsi
  111. subq $MCOUNT_INSN_SIZE, %rdi
  112. call *ftrace_trace_function
  113. movq 48(%rsp), %r9
  114. movq 40(%rsp), %r8
  115. movq 32(%rsp), %rdi
  116. movq 24(%rsp), %rsi
  117. movq 16(%rsp), %rdx
  118. movq 8(%rsp), %rcx
  119. movq (%rsp), %rax
  120. addq $0x38, %rsp
  121. jmp ftrace_stub
  122. END(mcount)
  123. #endif /* CONFIG_DYNAMIC_FTRACE */
  124. #endif /* CONFIG_FUNCTION_TRACER */
  125. #ifndef CONFIG_PREEMPT
  126. #define retint_kernel retint_restore_args
  127. #endif
  128. #ifdef CONFIG_PARAVIRT
  129. ENTRY(native_usergs_sysret64)
  130. swapgs
  131. sysretq
  132. #endif /* CONFIG_PARAVIRT */
  133. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  134. #ifdef CONFIG_TRACE_IRQFLAGS
  135. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  136. jnc 1f
  137. TRACE_IRQS_ON
  138. 1:
  139. #endif
  140. .endm
  141. /*
  142. * C code is not supposed to know about undefined top of stack. Every time
  143. * a C function with an pt_regs argument is called from the SYSCALL based
  144. * fast path FIXUP_TOP_OF_STACK is needed.
  145. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  146. * manipulation.
  147. */
  148. /* %rsp:at FRAMEEND */
  149. .macro FIXUP_TOP_OF_STACK tmp offset=0
  150. movq %gs:pda_oldrsp,\tmp
  151. movq \tmp,RSP+\offset(%rsp)
  152. movq $__USER_DS,SS+\offset(%rsp)
  153. movq $__USER_CS,CS+\offset(%rsp)
  154. movq $-1,RCX+\offset(%rsp)
  155. movq R11+\offset(%rsp),\tmp /* get eflags */
  156. movq \tmp,EFLAGS+\offset(%rsp)
  157. .endm
  158. .macro RESTORE_TOP_OF_STACK tmp offset=0
  159. movq RSP+\offset(%rsp),\tmp
  160. movq \tmp,%gs:pda_oldrsp
  161. movq EFLAGS+\offset(%rsp),\tmp
  162. movq \tmp,R11+\offset(%rsp)
  163. .endm
  164. .macro FAKE_STACK_FRAME child_rip
  165. /* push in order ss, rsp, eflags, cs, rip */
  166. xorl %eax, %eax
  167. pushq $__KERNEL_DS /* ss */
  168. CFI_ADJUST_CFA_OFFSET 8
  169. /*CFI_REL_OFFSET ss,0*/
  170. pushq %rax /* rsp */
  171. CFI_ADJUST_CFA_OFFSET 8
  172. CFI_REL_OFFSET rsp,0
  173. pushq $X86_EFLAGS_IF /* eflags - interrupts on */
  174. CFI_ADJUST_CFA_OFFSET 8
  175. /*CFI_REL_OFFSET rflags,0*/
  176. pushq $__KERNEL_CS /* cs */
  177. CFI_ADJUST_CFA_OFFSET 8
  178. /*CFI_REL_OFFSET cs,0*/
  179. pushq \child_rip /* rip */
  180. CFI_ADJUST_CFA_OFFSET 8
  181. CFI_REL_OFFSET rip,0
  182. pushq %rax /* orig rax */
  183. CFI_ADJUST_CFA_OFFSET 8
  184. .endm
  185. .macro UNFAKE_STACK_FRAME
  186. addq $8*6, %rsp
  187. CFI_ADJUST_CFA_OFFSET -(6*8)
  188. .endm
  189. /*
  190. * initial frame state for interrupts (and exceptions without error code)
  191. */
  192. .macro EMPTY_FRAME start=1 offset=0
  193. .if \start
  194. CFI_STARTPROC simple
  195. CFI_SIGNAL_FRAME
  196. CFI_DEF_CFA rsp,8+\offset
  197. .else
  198. CFI_DEF_CFA_OFFSET 8+\offset
  199. .endif
  200. .endm
  201. /*
  202. * initial frame state for interrupts (and exceptions without error code)
  203. */
  204. .macro INTR_FRAME start=1 offset=0
  205. EMPTY_FRAME \start, SS+8+\offset-RIP
  206. /*CFI_REL_OFFSET ss, SS+\offset-RIP*/
  207. CFI_REL_OFFSET rsp, RSP+\offset-RIP
  208. /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
  209. /*CFI_REL_OFFSET cs, CS+\offset-RIP*/
  210. CFI_REL_OFFSET rip, RIP+\offset-RIP
  211. .endm
  212. /*
  213. * initial frame state for exceptions with error code (and interrupts
  214. * with vector already pushed)
  215. */
  216. .macro XCPT_FRAME start=1 offset=0
  217. INTR_FRAME \start, RIP+\offset-ORIG_RAX
  218. /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
  219. .endm
  220. /*
  221. * frame that enables calling into C.
  222. */
  223. .macro PARTIAL_FRAME start=1 offset=0
  224. XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
  225. CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
  226. CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
  227. CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
  228. CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
  229. CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
  230. CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
  231. CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
  232. CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
  233. CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
  234. .endm
  235. /*
  236. * frame that enables passing a complete pt_regs to a C function.
  237. */
  238. .macro DEFAULT_FRAME start=1 offset=0
  239. PARTIAL_FRAME \start, R11+\offset-R15
  240. CFI_REL_OFFSET rbx, RBX+\offset
  241. CFI_REL_OFFSET rbp, RBP+\offset
  242. CFI_REL_OFFSET r12, R12+\offset
  243. CFI_REL_OFFSET r13, R13+\offset
  244. CFI_REL_OFFSET r14, R14+\offset
  245. CFI_REL_OFFSET r15, R15+\offset
  246. .endm
  247. /* save partial stack frame */
  248. ENTRY(save_args)
  249. XCPT_FRAME
  250. cld
  251. movq_cfi rdi, RDI+16-ARGOFFSET
  252. movq_cfi rsi, RSI+16-ARGOFFSET
  253. movq_cfi rdx, RDX+16-ARGOFFSET
  254. movq_cfi rcx, RCX+16-ARGOFFSET
  255. movq_cfi rax, RAX+16-ARGOFFSET
  256. movq_cfi r8, R8+16-ARGOFFSET
  257. movq_cfi r9, R9+16-ARGOFFSET
  258. movq_cfi r10, R10+16-ARGOFFSET
  259. movq_cfi r11, R11+16-ARGOFFSET
  260. leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */
  261. movq_cfi rbp, 8 /* push %rbp */
  262. leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
  263. testl $3, CS(%rdi)
  264. je 1f
  265. SWAPGS
  266. /*
  267. * irqcount is used to check if a CPU is already on an interrupt stack
  268. * or not. While this is essentially redundant with preempt_count it is
  269. * a little cheaper to use a separate counter in the PDA (short of
  270. * moving irq_enter into assembly, which would be too much work)
  271. */
  272. 1: incl %gs:pda_irqcount
  273. jne 2f
  274. popq_cfi %rax /* move return address... */
  275. mov %gs:pda_irqstackptr,%rsp
  276. EMPTY_FRAME 0
  277. pushq_cfi %rax /* ... to the new stack */
  278. /*
  279. * We entered an interrupt context - irqs are off:
  280. */
  281. 2: TRACE_IRQS_OFF
  282. ret
  283. CFI_ENDPROC
  284. END(save_args)
  285. ENTRY(save_rest)
  286. PARTIAL_FRAME 1 REST_SKIP+8
  287. movq 5*8+16(%rsp), %r11 /* save return address */
  288. movq_cfi rbx, RBX+16
  289. movq_cfi rbp, RBP+16
  290. movq_cfi r12, R12+16
  291. movq_cfi r13, R13+16
  292. movq_cfi r14, R14+16
  293. movq_cfi r15, R15+16
  294. movq %r11, 8(%rsp) /* return address */
  295. FIXUP_TOP_OF_STACK %r11, 16
  296. ret
  297. CFI_ENDPROC
  298. END(save_rest)
  299. /* save complete stack frame */
  300. ENTRY(save_paranoid)
  301. XCPT_FRAME 1 RDI+8
  302. cld
  303. movq_cfi rdi, RDI+8
  304. movq_cfi rsi, RSI+8
  305. movq_cfi rdx, RDX+8
  306. movq_cfi rcx, RCX+8
  307. movq_cfi rax, RAX+8
  308. movq_cfi r8, R8+8
  309. movq_cfi r9, R9+8
  310. movq_cfi r10, R10+8
  311. movq_cfi r11, R11+8
  312. movq_cfi rbx, RBX+8
  313. movq_cfi rbp, RBP+8
  314. movq_cfi r12, R12+8
  315. movq_cfi r13, R13+8
  316. movq_cfi r14, R14+8
  317. movq_cfi r15, R15+8
  318. movl $1,%ebx
  319. movl $MSR_GS_BASE,%ecx
  320. rdmsr
  321. testl %edx,%edx
  322. js 1f /* negative -> in kernel */
  323. SWAPGS
  324. xorl %ebx,%ebx
  325. 1: ret
  326. CFI_ENDPROC
  327. END(save_paranoid)
  328. /*
  329. * A newly forked process directly context switches into this address.
  330. *
  331. * rdi: prev task we switched from
  332. */
  333. ENTRY(ret_from_fork)
  334. DEFAULT_FRAME
  335. push kernel_eflags(%rip)
  336. CFI_ADJUST_CFA_OFFSET 8
  337. popf # reset kernel eflags
  338. CFI_ADJUST_CFA_OFFSET -8
  339. call schedule_tail # rdi: 'prev' task parameter
  340. GET_THREAD_INFO(%rcx)
  341. CFI_REMEMBER_STATE
  342. RESTORE_REST
  343. testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
  344. je int_ret_from_sys_call
  345. testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
  346. jnz int_ret_from_sys_call
  347. RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
  348. jmp ret_from_sys_call # go to the SYSRET fastpath
  349. CFI_RESTORE_STATE
  350. CFI_ENDPROC
  351. END(ret_from_fork)
  352. /*
  353. * System call entry. Upto 6 arguments in registers are supported.
  354. *
  355. * SYSCALL does not save anything on the stack and does not change the
  356. * stack pointer.
  357. */
  358. /*
  359. * Register setup:
  360. * rax system call number
  361. * rdi arg0
  362. * rcx return address for syscall/sysret, C arg3
  363. * rsi arg1
  364. * rdx arg2
  365. * r10 arg3 (--> moved to rcx for C)
  366. * r8 arg4
  367. * r9 arg5
  368. * r11 eflags for syscall/sysret, temporary for C
  369. * r12-r15,rbp,rbx saved by C code, not touched.
  370. *
  371. * Interrupts are off on entry.
  372. * Only called from user space.
  373. *
  374. * XXX if we had a free scratch register we could save the RSP into the stack frame
  375. * and report it properly in ps. Unfortunately we haven't.
  376. *
  377. * When user can change the frames always force IRET. That is because
  378. * it deals with uncanonical addresses better. SYSRET has trouble
  379. * with them due to bugs in both AMD and Intel CPUs.
  380. */
  381. ENTRY(system_call)
  382. CFI_STARTPROC simple
  383. CFI_SIGNAL_FRAME
  384. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  385. CFI_REGISTER rip,rcx
  386. /*CFI_REGISTER rflags,r11*/
  387. SWAPGS_UNSAFE_STACK
  388. /*
  389. * A hypervisor implementation might want to use a label
  390. * after the swapgs, so that it can do the swapgs
  391. * for the guest and jump here on syscall.
  392. */
  393. ENTRY(system_call_after_swapgs)
  394. movq %rsp,%gs:pda_oldrsp
  395. movq %gs:pda_kernelstack,%rsp
  396. /*
  397. * No need to follow this irqs off/on section - it's straight
  398. * and short:
  399. */
  400. ENABLE_INTERRUPTS(CLBR_NONE)
  401. SAVE_ARGS 8,1
  402. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  403. movq %rcx,RIP-ARGOFFSET(%rsp)
  404. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  405. GET_THREAD_INFO(%rcx)
  406. testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
  407. jnz tracesys
  408. system_call_fastpath:
  409. cmpq $__NR_syscall_max,%rax
  410. ja badsys
  411. movq %r10,%rcx
  412. call *sys_call_table(,%rax,8) # XXX: rip relative
  413. movq %rax,RAX-ARGOFFSET(%rsp)
  414. /*
  415. * Syscall return path ending with SYSRET (fast path)
  416. * Has incomplete stack frame and undefined top of stack.
  417. */
  418. ret_from_sys_call:
  419. movl $_TIF_ALLWORK_MASK,%edi
  420. /* edi: flagmask */
  421. sysret_check:
  422. LOCKDEP_SYS_EXIT
  423. GET_THREAD_INFO(%rcx)
  424. DISABLE_INTERRUPTS(CLBR_NONE)
  425. TRACE_IRQS_OFF
  426. movl TI_flags(%rcx),%edx
  427. andl %edi,%edx
  428. jnz sysret_careful
  429. CFI_REMEMBER_STATE
  430. /*
  431. * sysretq will re-enable interrupts:
  432. */
  433. TRACE_IRQS_ON
  434. movq RIP-ARGOFFSET(%rsp),%rcx
  435. CFI_REGISTER rip,rcx
  436. RESTORE_ARGS 0,-ARG_SKIP,1
  437. /*CFI_REGISTER rflags,r11*/
  438. movq %gs:pda_oldrsp, %rsp
  439. USERGS_SYSRET64
  440. CFI_RESTORE_STATE
  441. /* Handle reschedules */
  442. /* edx: work, edi: workmask */
  443. sysret_careful:
  444. bt $TIF_NEED_RESCHED,%edx
  445. jnc sysret_signal
  446. TRACE_IRQS_ON
  447. ENABLE_INTERRUPTS(CLBR_NONE)
  448. pushq %rdi
  449. CFI_ADJUST_CFA_OFFSET 8
  450. call schedule
  451. popq %rdi
  452. CFI_ADJUST_CFA_OFFSET -8
  453. jmp sysret_check
  454. /* Handle a signal */
  455. sysret_signal:
  456. TRACE_IRQS_ON
  457. ENABLE_INTERRUPTS(CLBR_NONE)
  458. #ifdef CONFIG_AUDITSYSCALL
  459. bt $TIF_SYSCALL_AUDIT,%edx
  460. jc sysret_audit
  461. #endif
  462. /* edx: work flags (arg3) */
  463. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  464. xorl %esi,%esi # oldset -> arg2
  465. SAVE_REST
  466. FIXUP_TOP_OF_STACK %r11
  467. call do_notify_resume
  468. RESTORE_TOP_OF_STACK %r11
  469. RESTORE_REST
  470. movl $_TIF_WORK_MASK,%edi
  471. /* Use IRET because user could have changed frame. This
  472. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  473. DISABLE_INTERRUPTS(CLBR_NONE)
  474. TRACE_IRQS_OFF
  475. jmp int_with_check
  476. badsys:
  477. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  478. jmp ret_from_sys_call
  479. #ifdef CONFIG_AUDITSYSCALL
  480. /*
  481. * Fast path for syscall audit without full syscall trace.
  482. * We just call audit_syscall_entry() directly, and then
  483. * jump back to the normal fast path.
  484. */
  485. auditsys:
  486. movq %r10,%r9 /* 6th arg: 4th syscall arg */
  487. movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
  488. movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
  489. movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
  490. movq %rax,%rsi /* 2nd arg: syscall number */
  491. movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
  492. call audit_syscall_entry
  493. LOAD_ARGS 0 /* reload call-clobbered registers */
  494. jmp system_call_fastpath
  495. /*
  496. * Return fast path for syscall audit. Call audit_syscall_exit()
  497. * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
  498. * masked off.
  499. */
  500. sysret_audit:
  501. movq %rax,%rsi /* second arg, syscall return value */
  502. cmpq $0,%rax /* is it < 0? */
  503. setl %al /* 1 if so, 0 if not */
  504. movzbl %al,%edi /* zero-extend that into %edi */
  505. inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
  506. call audit_syscall_exit
  507. movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
  508. jmp sysret_check
  509. #endif /* CONFIG_AUDITSYSCALL */
  510. /* Do syscall tracing */
  511. tracesys:
  512. #ifdef CONFIG_AUDITSYSCALL
  513. testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  514. jz auditsys
  515. #endif
  516. SAVE_REST
  517. movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
  518. FIXUP_TOP_OF_STACK %rdi
  519. movq %rsp,%rdi
  520. call syscall_trace_enter
  521. /*
  522. * Reload arg registers from stack in case ptrace changed them.
  523. * We don't reload %rax because syscall_trace_enter() returned
  524. * the value it wants us to use in the table lookup.
  525. */
  526. LOAD_ARGS ARGOFFSET, 1
  527. RESTORE_REST
  528. cmpq $__NR_syscall_max,%rax
  529. ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
  530. movq %r10,%rcx /* fixup for C */
  531. call *sys_call_table(,%rax,8)
  532. movq %rax,RAX-ARGOFFSET(%rsp)
  533. /* Use IRET because user could have changed frame */
  534. /*
  535. * Syscall return path ending with IRET.
  536. * Has correct top of stack, but partial stack frame.
  537. */
  538. .globl int_ret_from_sys_call
  539. .globl int_with_check
  540. int_ret_from_sys_call:
  541. DISABLE_INTERRUPTS(CLBR_NONE)
  542. TRACE_IRQS_OFF
  543. testl $3,CS-ARGOFFSET(%rsp)
  544. je retint_restore_args
  545. movl $_TIF_ALLWORK_MASK,%edi
  546. /* edi: mask to check */
  547. int_with_check:
  548. LOCKDEP_SYS_EXIT_IRQ
  549. GET_THREAD_INFO(%rcx)
  550. movl TI_flags(%rcx),%edx
  551. andl %edi,%edx
  552. jnz int_careful
  553. andl $~TS_COMPAT,TI_status(%rcx)
  554. jmp retint_swapgs
  555. /* Either reschedule or signal or syscall exit tracking needed. */
  556. /* First do a reschedule test. */
  557. /* edx: work, edi: workmask */
  558. int_careful:
  559. bt $TIF_NEED_RESCHED,%edx
  560. jnc int_very_careful
  561. TRACE_IRQS_ON
  562. ENABLE_INTERRUPTS(CLBR_NONE)
  563. pushq %rdi
  564. CFI_ADJUST_CFA_OFFSET 8
  565. call schedule
  566. popq %rdi
  567. CFI_ADJUST_CFA_OFFSET -8
  568. DISABLE_INTERRUPTS(CLBR_NONE)
  569. TRACE_IRQS_OFF
  570. jmp int_with_check
  571. /* handle signals and tracing -- both require a full stack frame */
  572. int_very_careful:
  573. TRACE_IRQS_ON
  574. ENABLE_INTERRUPTS(CLBR_NONE)
  575. SAVE_REST
  576. /* Check for syscall exit trace */
  577. testl $_TIF_WORK_SYSCALL_EXIT,%edx
  578. jz int_signal
  579. pushq %rdi
  580. CFI_ADJUST_CFA_OFFSET 8
  581. leaq 8(%rsp),%rdi # &ptregs -> arg1
  582. call syscall_trace_leave
  583. popq %rdi
  584. CFI_ADJUST_CFA_OFFSET -8
  585. andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
  586. jmp int_restore_rest
  587. int_signal:
  588. testl $_TIF_DO_NOTIFY_MASK,%edx
  589. jz 1f
  590. movq %rsp,%rdi # &ptregs -> arg1
  591. xorl %esi,%esi # oldset -> arg2
  592. call do_notify_resume
  593. 1: movl $_TIF_WORK_MASK,%edi
  594. int_restore_rest:
  595. RESTORE_REST
  596. DISABLE_INTERRUPTS(CLBR_NONE)
  597. TRACE_IRQS_OFF
  598. jmp int_with_check
  599. CFI_ENDPROC
  600. END(system_call)
  601. /*
  602. * Certain special system calls that need to save a complete full stack frame.
  603. */
  604. .macro PTREGSCALL label,func,arg
  605. ENTRY(\label)
  606. PARTIAL_FRAME 1 8 /* offset 8: return address */
  607. subq $REST_SKIP, %rsp
  608. CFI_ADJUST_CFA_OFFSET REST_SKIP
  609. call save_rest
  610. DEFAULT_FRAME 0 8 /* offset 8: return address */
  611. leaq 8(%rsp), \arg /* pt_regs pointer */
  612. call \func
  613. jmp ptregscall_common
  614. CFI_ENDPROC
  615. END(\label)
  616. .endm
  617. PTREGSCALL stub_clone, sys_clone, %r8
  618. PTREGSCALL stub_fork, sys_fork, %rdi
  619. PTREGSCALL stub_vfork, sys_vfork, %rdi
  620. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  621. PTREGSCALL stub_iopl, sys_iopl, %rsi
  622. ENTRY(ptregscall_common)
  623. DEFAULT_FRAME 1 8 /* offset 8: return address */
  624. RESTORE_TOP_OF_STACK %r11, 8
  625. movq_cfi_restore R15+8, r15
  626. movq_cfi_restore R14+8, r14
  627. movq_cfi_restore R13+8, r13
  628. movq_cfi_restore R12+8, r12
  629. movq_cfi_restore RBP+8, rbp
  630. movq_cfi_restore RBX+8, rbx
  631. ret $REST_SKIP /* pop extended registers */
  632. CFI_ENDPROC
  633. END(ptregscall_common)
  634. ENTRY(stub_execve)
  635. CFI_STARTPROC
  636. popq %r11
  637. CFI_ADJUST_CFA_OFFSET -8
  638. CFI_REGISTER rip, r11
  639. SAVE_REST
  640. FIXUP_TOP_OF_STACK %r11
  641. movq %rsp, %rcx
  642. call sys_execve
  643. RESTORE_TOP_OF_STACK %r11
  644. movq %rax,RAX(%rsp)
  645. RESTORE_REST
  646. jmp int_ret_from_sys_call
  647. CFI_ENDPROC
  648. END(stub_execve)
  649. /*
  650. * sigreturn is special because it needs to restore all registers on return.
  651. * This cannot be done with SYSRET, so use the IRET return path instead.
  652. */
  653. ENTRY(stub_rt_sigreturn)
  654. CFI_STARTPROC
  655. addq $8, %rsp
  656. CFI_ADJUST_CFA_OFFSET -8
  657. SAVE_REST
  658. movq %rsp,%rdi
  659. FIXUP_TOP_OF_STACK %r11
  660. call sys_rt_sigreturn
  661. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  662. RESTORE_REST
  663. jmp int_ret_from_sys_call
  664. CFI_ENDPROC
  665. END(stub_rt_sigreturn)
  666. /*
  667. * Build the entry stubs and pointer table with some assembler magic.
  668. * We pack 7 stubs into a single 32-byte chunk, which will fit in a
  669. * single cache line on all modern x86 implementations.
  670. */
  671. .section .init.rodata,"a"
  672. ENTRY(interrupt)
  673. .text
  674. .p2align 5
  675. .p2align CONFIG_X86_L1_CACHE_SHIFT
  676. ENTRY(irq_entries_start)
  677. INTR_FRAME
  678. vector=FIRST_EXTERNAL_VECTOR
  679. .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
  680. .balign 32
  681. .rept 7
  682. .if vector < NR_VECTORS
  683. .if vector <> FIRST_EXTERNAL_VECTOR
  684. CFI_ADJUST_CFA_OFFSET -8
  685. .endif
  686. 1: pushq $(~vector+0x80) /* Note: always in signed byte range */
  687. CFI_ADJUST_CFA_OFFSET 8
  688. .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
  689. jmp 2f
  690. .endif
  691. .previous
  692. .quad 1b
  693. .text
  694. vector=vector+1
  695. .endif
  696. .endr
  697. 2: jmp common_interrupt
  698. .endr
  699. CFI_ENDPROC
  700. END(irq_entries_start)
  701. .previous
  702. END(interrupt)
  703. .previous
  704. /*
  705. * Interrupt entry/exit.
  706. *
  707. * Interrupt entry points save only callee clobbered registers in fast path.
  708. *
  709. * Entry runs with interrupts off.
  710. */
  711. /* 0(%rsp): ~(interrupt number) */
  712. .macro interrupt func
  713. subq $10*8, %rsp
  714. CFI_ADJUST_CFA_OFFSET 10*8
  715. call save_args
  716. PARTIAL_FRAME 0
  717. call \func
  718. .endm
  719. /*
  720. * The interrupt stubs push (~vector+0x80) onto the stack and
  721. * then jump to common_interrupt.
  722. */
  723. .p2align CONFIG_X86_L1_CACHE_SHIFT
  724. common_interrupt:
  725. XCPT_FRAME
  726. addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
  727. interrupt do_IRQ
  728. /* 0(%rsp): oldrsp-ARGOFFSET */
  729. ret_from_intr:
  730. DISABLE_INTERRUPTS(CLBR_NONE)
  731. TRACE_IRQS_OFF
  732. decl %gs:pda_irqcount
  733. leaveq
  734. CFI_DEF_CFA_REGISTER rsp
  735. CFI_ADJUST_CFA_OFFSET -8
  736. exit_intr:
  737. GET_THREAD_INFO(%rcx)
  738. testl $3,CS-ARGOFFSET(%rsp)
  739. je retint_kernel
  740. /* Interrupt came from user space */
  741. /*
  742. * Has a correct top of stack, but a partial stack frame
  743. * %rcx: thread info. Interrupts off.
  744. */
  745. retint_with_reschedule:
  746. movl $_TIF_WORK_MASK,%edi
  747. retint_check:
  748. LOCKDEP_SYS_EXIT_IRQ
  749. movl TI_flags(%rcx),%edx
  750. andl %edi,%edx
  751. CFI_REMEMBER_STATE
  752. jnz retint_careful
  753. retint_swapgs: /* return to user-space */
  754. /*
  755. * The iretq could re-enable interrupts:
  756. */
  757. DISABLE_INTERRUPTS(CLBR_ANY)
  758. TRACE_IRQS_IRETQ
  759. SWAPGS
  760. jmp restore_args
  761. retint_restore_args: /* return to kernel space */
  762. DISABLE_INTERRUPTS(CLBR_ANY)
  763. /*
  764. * The iretq could re-enable interrupts:
  765. */
  766. TRACE_IRQS_IRETQ
  767. restore_args:
  768. RESTORE_ARGS 0,8,0
  769. irq_return:
  770. INTERRUPT_RETURN
  771. .section __ex_table, "a"
  772. .quad irq_return, bad_iret
  773. .previous
  774. #ifdef CONFIG_PARAVIRT
  775. ENTRY(native_iret)
  776. iretq
  777. .section __ex_table,"a"
  778. .quad native_iret, bad_iret
  779. .previous
  780. #endif
  781. .section .fixup,"ax"
  782. bad_iret:
  783. /*
  784. * The iret traps when the %cs or %ss being restored is bogus.
  785. * We've lost the original trap vector and error code.
  786. * #GPF is the most likely one to get for an invalid selector.
  787. * So pretend we completed the iret and took the #GPF in user mode.
  788. *
  789. * We are now running with the kernel GS after exception recovery.
  790. * But error_entry expects us to have user GS to match the user %cs,
  791. * so swap back.
  792. */
  793. pushq $0
  794. SWAPGS
  795. jmp general_protection
  796. .previous
  797. /* edi: workmask, edx: work */
  798. retint_careful:
  799. CFI_RESTORE_STATE
  800. bt $TIF_NEED_RESCHED,%edx
  801. jnc retint_signal
  802. TRACE_IRQS_ON
  803. ENABLE_INTERRUPTS(CLBR_NONE)
  804. pushq %rdi
  805. CFI_ADJUST_CFA_OFFSET 8
  806. call schedule
  807. popq %rdi
  808. CFI_ADJUST_CFA_OFFSET -8
  809. GET_THREAD_INFO(%rcx)
  810. DISABLE_INTERRUPTS(CLBR_NONE)
  811. TRACE_IRQS_OFF
  812. jmp retint_check
  813. retint_signal:
  814. testl $_TIF_DO_NOTIFY_MASK,%edx
  815. jz retint_swapgs
  816. TRACE_IRQS_ON
  817. ENABLE_INTERRUPTS(CLBR_NONE)
  818. SAVE_REST
  819. movq $-1,ORIG_RAX(%rsp)
  820. xorl %esi,%esi # oldset
  821. movq %rsp,%rdi # &pt_regs
  822. call do_notify_resume
  823. RESTORE_REST
  824. DISABLE_INTERRUPTS(CLBR_NONE)
  825. TRACE_IRQS_OFF
  826. GET_THREAD_INFO(%rcx)
  827. jmp retint_with_reschedule
  828. #ifdef CONFIG_PREEMPT
  829. /* Returning to kernel space. Check if we need preemption */
  830. /* rcx: threadinfo. interrupts off. */
  831. ENTRY(retint_kernel)
  832. cmpl $0,TI_preempt_count(%rcx)
  833. jnz retint_restore_args
  834. bt $TIF_NEED_RESCHED,TI_flags(%rcx)
  835. jnc retint_restore_args
  836. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  837. jnc retint_restore_args
  838. call preempt_schedule_irq
  839. jmp exit_intr
  840. #endif
  841. CFI_ENDPROC
  842. END(common_interrupt)
  843. /*
  844. * APIC interrupts.
  845. */
  846. .macro apicinterrupt num sym do_sym
  847. ENTRY(\sym)
  848. INTR_FRAME
  849. pushq $~(\num)
  850. CFI_ADJUST_CFA_OFFSET 8
  851. interrupt \do_sym
  852. jmp ret_from_intr
  853. CFI_ENDPROC
  854. END(\sym)
  855. .endm
  856. #ifdef CONFIG_SMP
  857. apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
  858. irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
  859. #endif
  860. apicinterrupt UV_BAU_MESSAGE \
  861. uv_bau_message_intr1 uv_bau_message_interrupt
  862. apicinterrupt LOCAL_TIMER_VECTOR \
  863. apic_timer_interrupt smp_apic_timer_interrupt
  864. #ifdef CONFIG_SMP
  865. apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
  866. invalidate_interrupt0 smp_invalidate_interrupt
  867. apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \
  868. invalidate_interrupt1 smp_invalidate_interrupt
  869. apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
  870. invalidate_interrupt2 smp_invalidate_interrupt
  871. apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
  872. invalidate_interrupt3 smp_invalidate_interrupt
  873. apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
  874. invalidate_interrupt4 smp_invalidate_interrupt
  875. apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \
  876. invalidate_interrupt5 smp_invalidate_interrupt
  877. apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
  878. invalidate_interrupt6 smp_invalidate_interrupt
  879. apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
  880. invalidate_interrupt7 smp_invalidate_interrupt
  881. #endif
  882. apicinterrupt THRESHOLD_APIC_VECTOR \
  883. threshold_interrupt mce_threshold_interrupt
  884. apicinterrupt THERMAL_APIC_VECTOR \
  885. thermal_interrupt smp_thermal_interrupt
  886. #ifdef CONFIG_SMP
  887. apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
  888. call_function_single_interrupt smp_call_function_single_interrupt
  889. apicinterrupt CALL_FUNCTION_VECTOR \
  890. call_function_interrupt smp_call_function_interrupt
  891. apicinterrupt RESCHEDULE_VECTOR \
  892. reschedule_interrupt smp_reschedule_interrupt
  893. #endif
  894. apicinterrupt ERROR_APIC_VECTOR \
  895. error_interrupt smp_error_interrupt
  896. apicinterrupt SPURIOUS_APIC_VECTOR \
  897. spurious_interrupt smp_spurious_interrupt
  898. #ifdef CONFIG_PERF_COUNTERS
  899. apicinterrupt LOCAL_PERF_VECTOR \
  900. perf_counter_interrupt smp_perf_counter_interrupt
  901. #endif
  902. /*
  903. * Exception entry points.
  904. */
  905. .macro zeroentry sym do_sym
  906. ENTRY(\sym)
  907. INTR_FRAME
  908. PARAVIRT_ADJUST_EXCEPTION_FRAME
  909. pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
  910. subq $15*8,%rsp
  911. CFI_ADJUST_CFA_OFFSET 15*8
  912. call error_entry
  913. DEFAULT_FRAME 0
  914. movq %rsp,%rdi /* pt_regs pointer */
  915. xorl %esi,%esi /* no error code */
  916. call \do_sym
  917. jmp error_exit /* %ebx: no swapgs flag */
  918. CFI_ENDPROC
  919. END(\sym)
  920. .endm
  921. .macro paranoidzeroentry sym do_sym
  922. ENTRY(\sym)
  923. INTR_FRAME
  924. PARAVIRT_ADJUST_EXCEPTION_FRAME
  925. pushq $-1 /* ORIG_RAX: no syscall to restart */
  926. CFI_ADJUST_CFA_OFFSET 8
  927. subq $15*8, %rsp
  928. call save_paranoid
  929. TRACE_IRQS_OFF
  930. movq %rsp,%rdi /* pt_regs pointer */
  931. xorl %esi,%esi /* no error code */
  932. call \do_sym
  933. jmp paranoid_exit /* %ebx: no swapgs flag */
  934. CFI_ENDPROC
  935. END(\sym)
  936. .endm
  937. .macro paranoidzeroentry_ist sym do_sym ist
  938. ENTRY(\sym)
  939. INTR_FRAME
  940. PARAVIRT_ADJUST_EXCEPTION_FRAME
  941. pushq $-1 /* ORIG_RAX: no syscall to restart */
  942. CFI_ADJUST_CFA_OFFSET 8
  943. subq $15*8, %rsp
  944. call save_paranoid
  945. TRACE_IRQS_OFF
  946. movq %rsp,%rdi /* pt_regs pointer */
  947. xorl %esi,%esi /* no error code */
  948. movq %gs:pda_data_offset, %rbp
  949. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  950. call \do_sym
  951. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  952. jmp paranoid_exit /* %ebx: no swapgs flag */
  953. CFI_ENDPROC
  954. END(\sym)
  955. .endm
  956. .macro errorentry sym do_sym
  957. ENTRY(\sym)
  958. XCPT_FRAME
  959. PARAVIRT_ADJUST_EXCEPTION_FRAME
  960. subq $15*8,%rsp
  961. CFI_ADJUST_CFA_OFFSET 15*8
  962. call error_entry
  963. DEFAULT_FRAME 0
  964. movq %rsp,%rdi /* pt_regs pointer */
  965. movq ORIG_RAX(%rsp),%rsi /* get error code */
  966. movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
  967. call \do_sym
  968. jmp error_exit /* %ebx: no swapgs flag */
  969. CFI_ENDPROC
  970. END(\sym)
  971. .endm
  972. /* error code is on the stack already */
  973. .macro paranoiderrorentry sym do_sym
  974. ENTRY(\sym)
  975. XCPT_FRAME
  976. PARAVIRT_ADJUST_EXCEPTION_FRAME
  977. subq $15*8,%rsp
  978. CFI_ADJUST_CFA_OFFSET 15*8
  979. call save_paranoid
  980. DEFAULT_FRAME 0
  981. TRACE_IRQS_OFF
  982. movq %rsp,%rdi /* pt_regs pointer */
  983. movq ORIG_RAX(%rsp),%rsi /* get error code */
  984. movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
  985. call \do_sym
  986. jmp paranoid_exit /* %ebx: no swapgs flag */
  987. CFI_ENDPROC
  988. END(\sym)
  989. .endm
  990. zeroentry divide_error do_divide_error
  991. zeroentry overflow do_overflow
  992. zeroentry bounds do_bounds
  993. zeroentry invalid_op do_invalid_op
  994. zeroentry device_not_available do_device_not_available
  995. paranoiderrorentry double_fault do_double_fault
  996. zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
  997. errorentry invalid_TSS do_invalid_TSS
  998. errorentry segment_not_present do_segment_not_present
  999. zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
  1000. zeroentry coprocessor_error do_coprocessor_error
  1001. errorentry alignment_check do_alignment_check
  1002. zeroentry simd_coprocessor_error do_simd_coprocessor_error
  1003. /* Reload gs selector with exception handling */
  1004. /* edi: new selector */
  1005. ENTRY(native_load_gs_index)
  1006. CFI_STARTPROC
  1007. pushf
  1008. CFI_ADJUST_CFA_OFFSET 8
  1009. DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
  1010. SWAPGS
  1011. gs_change:
  1012. movl %edi,%gs
  1013. 2: mfence /* workaround */
  1014. SWAPGS
  1015. popf
  1016. CFI_ADJUST_CFA_OFFSET -8
  1017. ret
  1018. CFI_ENDPROC
  1019. END(native_load_gs_index)
  1020. .section __ex_table,"a"
  1021. .align 8
  1022. .quad gs_change,bad_gs
  1023. .previous
  1024. .section .fixup,"ax"
  1025. /* running with kernelgs */
  1026. bad_gs:
  1027. SWAPGS /* switch back to user gs */
  1028. xorl %eax,%eax
  1029. movl %eax,%gs
  1030. jmp 2b
  1031. .previous
  1032. /*
  1033. * Create a kernel thread.
  1034. *
  1035. * C extern interface:
  1036. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  1037. *
  1038. * asm input arguments:
  1039. * rdi: fn, rsi: arg, rdx: flags
  1040. */
  1041. ENTRY(kernel_thread)
  1042. CFI_STARTPROC
  1043. FAKE_STACK_FRAME $child_rip
  1044. SAVE_ALL
  1045. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  1046. movq %rdx,%rdi
  1047. orq kernel_thread_flags(%rip),%rdi
  1048. movq $-1, %rsi
  1049. movq %rsp, %rdx
  1050. xorl %r8d,%r8d
  1051. xorl %r9d,%r9d
  1052. # clone now
  1053. call do_fork
  1054. movq %rax,RAX(%rsp)
  1055. xorl %edi,%edi
  1056. /*
  1057. * It isn't worth to check for reschedule here,
  1058. * so internally to the x86_64 port you can rely on kernel_thread()
  1059. * not to reschedule the child before returning, this avoids the need
  1060. * of hacks for example to fork off the per-CPU idle tasks.
  1061. * [Hopefully no generic code relies on the reschedule -AK]
  1062. */
  1063. RESTORE_ALL
  1064. UNFAKE_STACK_FRAME
  1065. ret
  1066. CFI_ENDPROC
  1067. END(kernel_thread)
  1068. ENTRY(child_rip)
  1069. pushq $0 # fake return address
  1070. CFI_STARTPROC
  1071. /*
  1072. * Here we are in the child and the registers are set as they were
  1073. * at kernel_thread() invocation in the parent.
  1074. */
  1075. movq %rdi, %rax
  1076. movq %rsi, %rdi
  1077. call *%rax
  1078. # exit
  1079. mov %eax, %edi
  1080. call do_exit
  1081. ud2 # padding for call trace
  1082. CFI_ENDPROC
  1083. END(child_rip)
  1084. /*
  1085. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  1086. *
  1087. * C extern interface:
  1088. * extern long execve(char *name, char **argv, char **envp)
  1089. *
  1090. * asm input arguments:
  1091. * rdi: name, rsi: argv, rdx: envp
  1092. *
  1093. * We want to fallback into:
  1094. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
  1095. *
  1096. * do_sys_execve asm fallback arguments:
  1097. * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
  1098. */
  1099. ENTRY(kernel_execve)
  1100. CFI_STARTPROC
  1101. FAKE_STACK_FRAME $0
  1102. SAVE_ALL
  1103. movq %rsp,%rcx
  1104. call sys_execve
  1105. movq %rax, RAX(%rsp)
  1106. RESTORE_REST
  1107. testq %rax,%rax
  1108. je int_ret_from_sys_call
  1109. RESTORE_ARGS
  1110. UNFAKE_STACK_FRAME
  1111. ret
  1112. CFI_ENDPROC
  1113. END(kernel_execve)
  1114. /* Call softirq on interrupt stack. Interrupts are off. */
  1115. ENTRY(call_softirq)
  1116. CFI_STARTPROC
  1117. push %rbp
  1118. CFI_ADJUST_CFA_OFFSET 8
  1119. CFI_REL_OFFSET rbp,0
  1120. mov %rsp,%rbp
  1121. CFI_DEF_CFA_REGISTER rbp
  1122. incl %gs:pda_irqcount
  1123. cmove %gs:pda_irqstackptr,%rsp
  1124. push %rbp # backlink for old unwinder
  1125. call __do_softirq
  1126. leaveq
  1127. CFI_DEF_CFA_REGISTER rsp
  1128. CFI_ADJUST_CFA_OFFSET -8
  1129. decl %gs:pda_irqcount
  1130. ret
  1131. CFI_ENDPROC
  1132. END(call_softirq)
  1133. #ifdef CONFIG_XEN
  1134. zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
  1135. /*
  1136. * A note on the "critical region" in our callback handler.
  1137. * We want to avoid stacking callback handlers due to events occurring
  1138. * during handling of the last event. To do this, we keep events disabled
  1139. * until we've done all processing. HOWEVER, we must enable events before
  1140. * popping the stack frame (can't be done atomically) and so it would still
  1141. * be possible to get enough handler activations to overflow the stack.
  1142. * Although unlikely, bugs of that kind are hard to track down, so we'd
  1143. * like to avoid the possibility.
  1144. * So, on entry to the handler we detect whether we interrupted an
  1145. * existing activation in its critical region -- if so, we pop the current
  1146. * activation and restart the handler using the previous one.
  1147. */
  1148. ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
  1149. CFI_STARTPROC
  1150. /*
  1151. * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
  1152. * see the correct pointer to the pt_regs
  1153. */
  1154. movq %rdi, %rsp # we don't return, adjust the stack frame
  1155. CFI_ENDPROC
  1156. DEFAULT_FRAME
  1157. 11: incl %gs:pda_irqcount
  1158. movq %rsp,%rbp
  1159. CFI_DEF_CFA_REGISTER rbp
  1160. cmovzq %gs:pda_irqstackptr,%rsp
  1161. pushq %rbp # backlink for old unwinder
  1162. call xen_evtchn_do_upcall
  1163. popq %rsp
  1164. CFI_DEF_CFA_REGISTER rsp
  1165. decl %gs:pda_irqcount
  1166. jmp error_exit
  1167. CFI_ENDPROC
  1168. END(do_hypervisor_callback)
  1169. /*
  1170. * Hypervisor uses this for application faults while it executes.
  1171. * We get here for two reasons:
  1172. * 1. Fault while reloading DS, ES, FS or GS
  1173. * 2. Fault while executing IRET
  1174. * Category 1 we do not need to fix up as Xen has already reloaded all segment
  1175. * registers that could be reloaded and zeroed the others.
  1176. * Category 2 we fix up by killing the current process. We cannot use the
  1177. * normal Linux return path in this case because if we use the IRET hypercall
  1178. * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
  1179. * We distinguish between categories by comparing each saved segment register
  1180. * with its current contents: any discrepancy means we in category 1.
  1181. */
  1182. ENTRY(xen_failsafe_callback)
  1183. INTR_FRAME 1 (6*8)
  1184. /*CFI_REL_OFFSET gs,GS*/
  1185. /*CFI_REL_OFFSET fs,FS*/
  1186. /*CFI_REL_OFFSET es,ES*/
  1187. /*CFI_REL_OFFSET ds,DS*/
  1188. CFI_REL_OFFSET r11,8
  1189. CFI_REL_OFFSET rcx,0
  1190. movw %ds,%cx
  1191. cmpw %cx,0x10(%rsp)
  1192. CFI_REMEMBER_STATE
  1193. jne 1f
  1194. movw %es,%cx
  1195. cmpw %cx,0x18(%rsp)
  1196. jne 1f
  1197. movw %fs,%cx
  1198. cmpw %cx,0x20(%rsp)
  1199. jne 1f
  1200. movw %gs,%cx
  1201. cmpw %cx,0x28(%rsp)
  1202. jne 1f
  1203. /* All segments match their saved values => Category 2 (Bad IRET). */
  1204. movq (%rsp),%rcx
  1205. CFI_RESTORE rcx
  1206. movq 8(%rsp),%r11
  1207. CFI_RESTORE r11
  1208. addq $0x30,%rsp
  1209. CFI_ADJUST_CFA_OFFSET -0x30
  1210. pushq_cfi $0 /* RIP */
  1211. pushq_cfi %r11
  1212. pushq_cfi %rcx
  1213. jmp general_protection
  1214. CFI_RESTORE_STATE
  1215. 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
  1216. movq (%rsp),%rcx
  1217. CFI_RESTORE rcx
  1218. movq 8(%rsp),%r11
  1219. CFI_RESTORE r11
  1220. addq $0x30,%rsp
  1221. CFI_ADJUST_CFA_OFFSET -0x30
  1222. pushq_cfi $0
  1223. SAVE_ALL
  1224. jmp error_exit
  1225. CFI_ENDPROC
  1226. END(xen_failsafe_callback)
  1227. #endif /* CONFIG_XEN */
  1228. /*
  1229. * Some functions should be protected against kprobes
  1230. */
  1231. .pushsection .kprobes.text, "ax"
  1232. paranoidzeroentry_ist debug do_debug DEBUG_STACK
  1233. paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
  1234. paranoiderrorentry stack_segment do_stack_segment
  1235. errorentry general_protection do_general_protection
  1236. errorentry page_fault do_page_fault
  1237. #ifdef CONFIG_X86_MCE
  1238. paranoidzeroentry machine_check do_machine_check
  1239. #endif
  1240. /*
  1241. * "Paranoid" exit path from exception stack.
  1242. * Paranoid because this is used by NMIs and cannot take
  1243. * any kernel state for granted.
  1244. * We don't do kernel preemption checks here, because only
  1245. * NMI should be common and it does not enable IRQs and
  1246. * cannot get reschedule ticks.
  1247. *
  1248. * "trace" is 0 for the NMI handler only, because irq-tracing
  1249. * is fundamentally NMI-unsafe. (we cannot change the soft and
  1250. * hard flags at once, atomically)
  1251. */
  1252. /* ebx: no swapgs flag */
  1253. ENTRY(paranoid_exit)
  1254. INTR_FRAME
  1255. DISABLE_INTERRUPTS(CLBR_NONE)
  1256. TRACE_IRQS_OFF
  1257. testl %ebx,%ebx /* swapgs needed? */
  1258. jnz paranoid_restore
  1259. testl $3,CS(%rsp)
  1260. jnz paranoid_userspace
  1261. paranoid_swapgs:
  1262. TRACE_IRQS_IRETQ 0
  1263. SWAPGS_UNSAFE_STACK
  1264. paranoid_restore:
  1265. RESTORE_ALL 8
  1266. jmp irq_return
  1267. paranoid_userspace:
  1268. GET_THREAD_INFO(%rcx)
  1269. movl TI_flags(%rcx),%ebx
  1270. andl $_TIF_WORK_MASK,%ebx
  1271. jz paranoid_swapgs
  1272. movq %rsp,%rdi /* &pt_regs */
  1273. call sync_regs
  1274. movq %rax,%rsp /* switch stack for scheduling */
  1275. testl $_TIF_NEED_RESCHED,%ebx
  1276. jnz paranoid_schedule
  1277. movl %ebx,%edx /* arg3: thread flags */
  1278. TRACE_IRQS_ON
  1279. ENABLE_INTERRUPTS(CLBR_NONE)
  1280. xorl %esi,%esi /* arg2: oldset */
  1281. movq %rsp,%rdi /* arg1: &pt_regs */
  1282. call do_notify_resume
  1283. DISABLE_INTERRUPTS(CLBR_NONE)
  1284. TRACE_IRQS_OFF
  1285. jmp paranoid_userspace
  1286. paranoid_schedule:
  1287. TRACE_IRQS_ON
  1288. ENABLE_INTERRUPTS(CLBR_ANY)
  1289. call schedule
  1290. DISABLE_INTERRUPTS(CLBR_ANY)
  1291. TRACE_IRQS_OFF
  1292. jmp paranoid_userspace
  1293. CFI_ENDPROC
  1294. END(paranoid_exit)
  1295. /*
  1296. * Exception entry point. This expects an error code/orig_rax on the stack.
  1297. * returns in "no swapgs flag" in %ebx.
  1298. */
  1299. ENTRY(error_entry)
  1300. XCPT_FRAME
  1301. CFI_ADJUST_CFA_OFFSET 15*8
  1302. /* oldrax contains error code */
  1303. cld
  1304. movq_cfi rdi, RDI+8
  1305. movq_cfi rsi, RSI+8
  1306. movq_cfi rdx, RDX+8
  1307. movq_cfi rcx, RCX+8
  1308. movq_cfi rax, RAX+8
  1309. movq_cfi r8, R8+8
  1310. movq_cfi r9, R9+8
  1311. movq_cfi r10, R10+8
  1312. movq_cfi r11, R11+8
  1313. movq_cfi rbx, RBX+8
  1314. movq_cfi rbp, RBP+8
  1315. movq_cfi r12, R12+8
  1316. movq_cfi r13, R13+8
  1317. movq_cfi r14, R14+8
  1318. movq_cfi r15, R15+8
  1319. xorl %ebx,%ebx
  1320. testl $3,CS+8(%rsp)
  1321. je error_kernelspace
  1322. error_swapgs:
  1323. SWAPGS
  1324. error_sti:
  1325. TRACE_IRQS_OFF
  1326. ret
  1327. CFI_ENDPROC
  1328. /*
  1329. * There are two places in the kernel that can potentially fault with
  1330. * usergs. Handle them here. The exception handlers after iret run with
  1331. * kernel gs again, so don't set the user space flag. B stepping K8s
  1332. * sometimes report an truncated RIP for IRET exceptions returning to
  1333. * compat mode. Check for these here too.
  1334. */
  1335. error_kernelspace:
  1336. incl %ebx
  1337. leaq irq_return(%rip),%rcx
  1338. cmpq %rcx,RIP+8(%rsp)
  1339. je error_swapgs
  1340. movl %ecx,%ecx /* zero extend */
  1341. cmpq %rcx,RIP+8(%rsp)
  1342. je error_swapgs
  1343. cmpq $gs_change,RIP+8(%rsp)
  1344. je error_swapgs
  1345. jmp error_sti
  1346. END(error_entry)
  1347. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  1348. ENTRY(error_exit)
  1349. DEFAULT_FRAME
  1350. movl %ebx,%eax
  1351. RESTORE_REST
  1352. DISABLE_INTERRUPTS(CLBR_NONE)
  1353. TRACE_IRQS_OFF
  1354. GET_THREAD_INFO(%rcx)
  1355. testl %eax,%eax
  1356. jne retint_kernel
  1357. LOCKDEP_SYS_EXIT_IRQ
  1358. movl TI_flags(%rcx),%edx
  1359. movl $_TIF_WORK_MASK,%edi
  1360. andl %edi,%edx
  1361. jnz retint_careful
  1362. jmp retint_swapgs
  1363. CFI_ENDPROC
  1364. END(error_exit)
  1365. /* runs on exception stack */
  1366. ENTRY(nmi)
  1367. INTR_FRAME
  1368. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1369. pushq_cfi $-1
  1370. subq $15*8, %rsp
  1371. CFI_ADJUST_CFA_OFFSET 15*8
  1372. call save_paranoid
  1373. DEFAULT_FRAME 0
  1374. /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
  1375. movq %rsp,%rdi
  1376. movq $-1,%rsi
  1377. call do_nmi
  1378. #ifdef CONFIG_TRACE_IRQFLAGS
  1379. /* paranoidexit; without TRACE_IRQS_OFF */
  1380. /* ebx: no swapgs flag */
  1381. DISABLE_INTERRUPTS(CLBR_NONE)
  1382. testl %ebx,%ebx /* swapgs needed? */
  1383. jnz nmi_restore
  1384. testl $3,CS(%rsp)
  1385. jnz nmi_userspace
  1386. nmi_swapgs:
  1387. SWAPGS_UNSAFE_STACK
  1388. nmi_restore:
  1389. RESTORE_ALL 8
  1390. jmp irq_return
  1391. nmi_userspace:
  1392. GET_THREAD_INFO(%rcx)
  1393. movl TI_flags(%rcx),%ebx
  1394. andl $_TIF_WORK_MASK,%ebx
  1395. jz nmi_swapgs
  1396. movq %rsp,%rdi /* &pt_regs */
  1397. call sync_regs
  1398. movq %rax,%rsp /* switch stack for scheduling */
  1399. testl $_TIF_NEED_RESCHED,%ebx
  1400. jnz nmi_schedule
  1401. movl %ebx,%edx /* arg3: thread flags */
  1402. ENABLE_INTERRUPTS(CLBR_NONE)
  1403. xorl %esi,%esi /* arg2: oldset */
  1404. movq %rsp,%rdi /* arg1: &pt_regs */
  1405. call do_notify_resume
  1406. DISABLE_INTERRUPTS(CLBR_NONE)
  1407. jmp nmi_userspace
  1408. nmi_schedule:
  1409. ENABLE_INTERRUPTS(CLBR_ANY)
  1410. call schedule
  1411. DISABLE_INTERRUPTS(CLBR_ANY)
  1412. jmp nmi_userspace
  1413. CFI_ENDPROC
  1414. #else
  1415. jmp paranoid_exit
  1416. CFI_ENDPROC
  1417. #endif
  1418. END(nmi)
  1419. ENTRY(ignore_sysret)
  1420. CFI_STARTPROC
  1421. mov $-ENOSYS,%eax
  1422. sysret
  1423. CFI_ENDPROC
  1424. END(ignore_sysret)
  1425. /*
  1426. * End of kprobes section
  1427. */
  1428. .popsection