entry_64.S 36 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. */
  8. /*
  9. * entry.S contains the system-call and fault low-level handling routines.
  10. *
  11. * NOTE: This code handles signal-recognition, which happens every time
  12. * after an interrupt and after each system call.
  13. *
  14. * Normal syscalls and interrupts don't save a full stack frame, this is
  15. * only done for syscall tracing, signals or fork/exec et.al.
  16. *
  17. * A note on terminology:
  18. * - top of stack: Architecture defined interrupt frame from SS to RIP
  19. * at the top of the kernel process stack.
  20. * - partial stack frame: partially saved registers upto R11.
  21. * - full stack frame: Like partial stack frame, but all register saved.
  22. *
  23. * Some macro usage:
  24. * - CFI macros are used to generate dwarf2 unwind information for better
  25. * backtraces. They don't change any code.
  26. * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
  27. * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
  28. * There are unfortunately lots of special cases where some registers
  29. * not touched. The macro is a big mess that should be cleaned up.
  30. * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  31. * Gives a full stack frame.
  32. * - ENTRY/END Define functions in the symbol table.
  33. * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  34. * frame that is otherwise undefined after a SYSCALL
  35. * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  36. * - errorentry/paranoidentry/zeroentry - Define exception entry points.
  37. */
  38. #include <linux/linkage.h>
  39. #include <asm/segment.h>
  40. #include <asm/cache.h>
  41. #include <asm/errno.h>
  42. #include <asm/dwarf2.h>
  43. #include <asm/calling.h>
  44. #include <asm/asm-offsets.h>
  45. #include <asm/msr.h>
  46. #include <asm/unistd.h>
  47. #include <asm/thread_info.h>
  48. #include <asm/hw_irq.h>
  49. #include <asm/page.h>
  50. #include <asm/irqflags.h>
  51. #include <asm/paravirt.h>
  52. #include <asm/ftrace.h>
  53. /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
  54. #include <linux/elf-em.h>
  55. #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
  56. #define __AUDIT_ARCH_64BIT 0x80000000
  57. #define __AUDIT_ARCH_LE 0x40000000
  58. .code64
  59. #ifdef CONFIG_FUNCTION_TRACER
  60. #ifdef CONFIG_DYNAMIC_FTRACE
  61. ENTRY(mcount)
  62. retq
  63. END(mcount)
  64. ENTRY(ftrace_caller)
  65. cmpl $0, function_trace_stop
  66. jne ftrace_stub
  67. /* taken from glibc */
  68. subq $0x38, %rsp
  69. movq %rax, (%rsp)
  70. movq %rcx, 8(%rsp)
  71. movq %rdx, 16(%rsp)
  72. movq %rsi, 24(%rsp)
  73. movq %rdi, 32(%rsp)
  74. movq %r8, 40(%rsp)
  75. movq %r9, 48(%rsp)
  76. movq 0x38(%rsp), %rdi
  77. movq 8(%rbp), %rsi
  78. subq $MCOUNT_INSN_SIZE, %rdi
  79. .globl ftrace_call
  80. ftrace_call:
  81. call ftrace_stub
  82. movq 48(%rsp), %r9
  83. movq 40(%rsp), %r8
  84. movq 32(%rsp), %rdi
  85. movq 24(%rsp), %rsi
  86. movq 16(%rsp), %rdx
  87. movq 8(%rsp), %rcx
  88. movq (%rsp), %rax
  89. addq $0x38, %rsp
  90. #ifdef CONFIG_FUNCTION_GRAPH_TRACER
  91. .globl ftrace_graph_call
  92. ftrace_graph_call:
  93. jmp ftrace_stub
  94. #endif
  95. .globl ftrace_stub
  96. ftrace_stub:
  97. retq
  98. END(ftrace_caller)
  99. #else /* ! CONFIG_DYNAMIC_FTRACE */
  100. ENTRY(mcount)
  101. cmpl $0, function_trace_stop
  102. jne ftrace_stub
  103. cmpq $ftrace_stub, ftrace_trace_function
  104. jnz trace
  105. #ifdef CONFIG_FUNCTION_GRAPH_TRACER
  106. cmpq $ftrace_stub, ftrace_graph_return
  107. jnz ftrace_graph_caller
  108. cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
  109. jnz ftrace_graph_caller
  110. #endif
  111. .globl ftrace_stub
  112. ftrace_stub:
  113. retq
  114. trace:
  115. /* taken from glibc */
  116. subq $0x38, %rsp
  117. movq %rax, (%rsp)
  118. movq %rcx, 8(%rsp)
  119. movq %rdx, 16(%rsp)
  120. movq %rsi, 24(%rsp)
  121. movq %rdi, 32(%rsp)
  122. movq %r8, 40(%rsp)
  123. movq %r9, 48(%rsp)
  124. movq 0x38(%rsp), %rdi
  125. movq 8(%rbp), %rsi
  126. subq $MCOUNT_INSN_SIZE, %rdi
  127. call *ftrace_trace_function
  128. movq 48(%rsp), %r9
  129. movq 40(%rsp), %r8
  130. movq 32(%rsp), %rdi
  131. movq 24(%rsp), %rsi
  132. movq 16(%rsp), %rdx
  133. movq 8(%rsp), %rcx
  134. movq (%rsp), %rax
  135. addq $0x38, %rsp
  136. jmp ftrace_stub
  137. END(mcount)
  138. #endif /* CONFIG_DYNAMIC_FTRACE */
  139. #endif /* CONFIG_FUNCTION_TRACER */
  140. #ifdef CONFIG_FUNCTION_GRAPH_TRACER
  141. ENTRY(ftrace_graph_caller)
  142. cmpl $0, function_trace_stop
  143. jne ftrace_stub
  144. subq $0x38, %rsp
  145. movq %rax, (%rsp)
  146. movq %rcx, 8(%rsp)
  147. movq %rdx, 16(%rsp)
  148. movq %rsi, 24(%rsp)
  149. movq %rdi, 32(%rsp)
  150. movq %r8, 40(%rsp)
  151. movq %r9, 48(%rsp)
  152. leaq 8(%rbp), %rdi
  153. movq 0x38(%rsp), %rsi
  154. subq $MCOUNT_INSN_SIZE, %rsi
  155. call prepare_ftrace_return
  156. movq 48(%rsp), %r9
  157. movq 40(%rsp), %r8
  158. movq 32(%rsp), %rdi
  159. movq 24(%rsp), %rsi
  160. movq 16(%rsp), %rdx
  161. movq 8(%rsp), %rcx
  162. movq (%rsp), %rax
  163. addq $0x38, %rsp
  164. retq
  165. END(ftrace_graph_caller)
  166. .globl return_to_handler
  167. return_to_handler:
  168. subq $80, %rsp
  169. movq %rax, (%rsp)
  170. movq %rcx, 8(%rsp)
  171. movq %rdx, 16(%rsp)
  172. movq %rsi, 24(%rsp)
  173. movq %rdi, 32(%rsp)
  174. movq %r8, 40(%rsp)
  175. movq %r9, 48(%rsp)
  176. movq %r10, 56(%rsp)
  177. movq %r11, 64(%rsp)
  178. call ftrace_return_to_handler
  179. movq %rax, 72(%rsp)
  180. movq 64(%rsp), %r11
  181. movq 56(%rsp), %r10
  182. movq 48(%rsp), %r9
  183. movq 40(%rsp), %r8
  184. movq 32(%rsp), %rdi
  185. movq 24(%rsp), %rsi
  186. movq 16(%rsp), %rdx
  187. movq 8(%rsp), %rcx
  188. movq (%rsp), %rax
  189. addq $72, %rsp
  190. retq
  191. #endif
  192. #ifndef CONFIG_PREEMPT
  193. #define retint_kernel retint_restore_args
  194. #endif
  195. #ifdef CONFIG_PARAVIRT
  196. ENTRY(native_usergs_sysret64)
  197. swapgs
  198. sysretq
  199. #endif /* CONFIG_PARAVIRT */
  200. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  201. #ifdef CONFIG_TRACE_IRQFLAGS
  202. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  203. jnc 1f
  204. TRACE_IRQS_ON
  205. 1:
  206. #endif
  207. .endm
  208. /*
  209. * C code is not supposed to know about undefined top of stack. Every time
  210. * a C function with an pt_regs argument is called from the SYSCALL based
  211. * fast path FIXUP_TOP_OF_STACK is needed.
  212. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  213. * manipulation.
  214. */
  215. /* %rsp:at FRAMEEND */
  216. .macro FIXUP_TOP_OF_STACK tmp
  217. movq %gs:pda_oldrsp,\tmp
  218. movq \tmp,RSP(%rsp)
  219. movq $__USER_DS,SS(%rsp)
  220. movq $__USER_CS,CS(%rsp)
  221. movq $-1,RCX(%rsp)
  222. movq R11(%rsp),\tmp /* get eflags */
  223. movq \tmp,EFLAGS(%rsp)
  224. .endm
  225. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  226. movq RSP-\offset(%rsp),\tmp
  227. movq \tmp,%gs:pda_oldrsp
  228. movq EFLAGS-\offset(%rsp),\tmp
  229. movq \tmp,R11-\offset(%rsp)
  230. .endm
  231. .macro FAKE_STACK_FRAME child_rip
  232. /* push in order ss, rsp, eflags, cs, rip */
  233. xorl %eax, %eax
  234. pushq $__KERNEL_DS /* ss */
  235. CFI_ADJUST_CFA_OFFSET 8
  236. /*CFI_REL_OFFSET ss,0*/
  237. pushq %rax /* rsp */
  238. CFI_ADJUST_CFA_OFFSET 8
  239. CFI_REL_OFFSET rsp,0
  240. pushq $(1<<9) /* eflags - interrupts on */
  241. CFI_ADJUST_CFA_OFFSET 8
  242. /*CFI_REL_OFFSET rflags,0*/
  243. pushq $__KERNEL_CS /* cs */
  244. CFI_ADJUST_CFA_OFFSET 8
  245. /*CFI_REL_OFFSET cs,0*/
  246. pushq \child_rip /* rip */
  247. CFI_ADJUST_CFA_OFFSET 8
  248. CFI_REL_OFFSET rip,0
  249. pushq %rax /* orig rax */
  250. CFI_ADJUST_CFA_OFFSET 8
  251. .endm
  252. .macro UNFAKE_STACK_FRAME
  253. addq $8*6, %rsp
  254. CFI_ADJUST_CFA_OFFSET -(6*8)
  255. .endm
  256. .macro CFI_DEFAULT_STACK start=1
  257. .if \start
  258. CFI_STARTPROC simple
  259. CFI_SIGNAL_FRAME
  260. CFI_DEF_CFA rsp,SS+8
  261. .else
  262. CFI_DEF_CFA_OFFSET SS+8
  263. .endif
  264. CFI_REL_OFFSET r15,R15
  265. CFI_REL_OFFSET r14,R14
  266. CFI_REL_OFFSET r13,R13
  267. CFI_REL_OFFSET r12,R12
  268. CFI_REL_OFFSET rbp,RBP
  269. CFI_REL_OFFSET rbx,RBX
  270. CFI_REL_OFFSET r11,R11
  271. CFI_REL_OFFSET r10,R10
  272. CFI_REL_OFFSET r9,R9
  273. CFI_REL_OFFSET r8,R8
  274. CFI_REL_OFFSET rax,RAX
  275. CFI_REL_OFFSET rcx,RCX
  276. CFI_REL_OFFSET rdx,RDX
  277. CFI_REL_OFFSET rsi,RSI
  278. CFI_REL_OFFSET rdi,RDI
  279. CFI_REL_OFFSET rip,RIP
  280. /*CFI_REL_OFFSET cs,CS*/
  281. /*CFI_REL_OFFSET rflags,EFLAGS*/
  282. CFI_REL_OFFSET rsp,RSP
  283. /*CFI_REL_OFFSET ss,SS*/
  284. .endm
  285. /*
  286. * A newly forked process directly context switches into this.
  287. */
  288. /* rdi: prev */
  289. ENTRY(ret_from_fork)
  290. CFI_DEFAULT_STACK
  291. push kernel_eflags(%rip)
  292. CFI_ADJUST_CFA_OFFSET 8
  293. popf # reset kernel eflags
  294. CFI_ADJUST_CFA_OFFSET -8
  295. call schedule_tail
  296. GET_THREAD_INFO(%rcx)
  297. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  298. jnz rff_trace
  299. rff_action:
  300. RESTORE_REST
  301. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  302. je int_ret_from_sys_call
  303. testl $_TIF_IA32,TI_flags(%rcx)
  304. jnz int_ret_from_sys_call
  305. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  306. jmp ret_from_sys_call
  307. rff_trace:
  308. movq %rsp,%rdi
  309. call syscall_trace_leave
  310. GET_THREAD_INFO(%rcx)
  311. jmp rff_action
  312. CFI_ENDPROC
  313. END(ret_from_fork)
  314. /*
  315. * System call entry. Upto 6 arguments in registers are supported.
  316. *
  317. * SYSCALL does not save anything on the stack and does not change the
  318. * stack pointer.
  319. */
  320. /*
  321. * Register setup:
  322. * rax system call number
  323. * rdi arg0
  324. * rcx return address for syscall/sysret, C arg3
  325. * rsi arg1
  326. * rdx arg2
  327. * r10 arg3 (--> moved to rcx for C)
  328. * r8 arg4
  329. * r9 arg5
  330. * r11 eflags for syscall/sysret, temporary for C
  331. * r12-r15,rbp,rbx saved by C code, not touched.
  332. *
  333. * Interrupts are off on entry.
  334. * Only called from user space.
  335. *
  336. * XXX if we had a free scratch register we could save the RSP into the stack frame
  337. * and report it properly in ps. Unfortunately we haven't.
  338. *
  339. * When user can change the frames always force IRET. That is because
  340. * it deals with uncanonical addresses better. SYSRET has trouble
  341. * with them due to bugs in both AMD and Intel CPUs.
  342. */
  343. ENTRY(system_call)
  344. CFI_STARTPROC simple
  345. CFI_SIGNAL_FRAME
  346. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  347. CFI_REGISTER rip,rcx
  348. /*CFI_REGISTER rflags,r11*/
  349. SWAPGS_UNSAFE_STACK
  350. /*
  351. * A hypervisor implementation might want to use a label
  352. * after the swapgs, so that it can do the swapgs
  353. * for the guest and jump here on syscall.
  354. */
  355. ENTRY(system_call_after_swapgs)
  356. movq %rsp,%gs:pda_oldrsp
  357. movq %gs:pda_kernelstack,%rsp
  358. /*
  359. * No need to follow this irqs off/on section - it's straight
  360. * and short:
  361. */
  362. ENABLE_INTERRUPTS(CLBR_NONE)
  363. SAVE_ARGS 8,1
  364. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  365. movq %rcx,RIP-ARGOFFSET(%rsp)
  366. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  367. GET_THREAD_INFO(%rcx)
  368. testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
  369. jnz tracesys
  370. system_call_fastpath:
  371. cmpq $__NR_syscall_max,%rax
  372. ja badsys
  373. movq %r10,%rcx
  374. call *sys_call_table(,%rax,8) # XXX: rip relative
  375. movq %rax,RAX-ARGOFFSET(%rsp)
  376. /*
  377. * Syscall return path ending with SYSRET (fast path)
  378. * Has incomplete stack frame and undefined top of stack.
  379. */
  380. ret_from_sys_call:
  381. movl $_TIF_ALLWORK_MASK,%edi
  382. /* edi: flagmask */
  383. sysret_check:
  384. LOCKDEP_SYS_EXIT
  385. GET_THREAD_INFO(%rcx)
  386. DISABLE_INTERRUPTS(CLBR_NONE)
  387. TRACE_IRQS_OFF
  388. movl TI_flags(%rcx),%edx
  389. andl %edi,%edx
  390. jnz sysret_careful
  391. CFI_REMEMBER_STATE
  392. /*
  393. * sysretq will re-enable interrupts:
  394. */
  395. TRACE_IRQS_ON
  396. movq RIP-ARGOFFSET(%rsp),%rcx
  397. CFI_REGISTER rip,rcx
  398. RESTORE_ARGS 0,-ARG_SKIP,1
  399. /*CFI_REGISTER rflags,r11*/
  400. movq %gs:pda_oldrsp, %rsp
  401. USERGS_SYSRET64
  402. CFI_RESTORE_STATE
  403. /* Handle reschedules */
  404. /* edx: work, edi: workmask */
  405. sysret_careful:
  406. bt $TIF_NEED_RESCHED,%edx
  407. jnc sysret_signal
  408. TRACE_IRQS_ON
  409. ENABLE_INTERRUPTS(CLBR_NONE)
  410. pushq %rdi
  411. CFI_ADJUST_CFA_OFFSET 8
  412. call schedule
  413. popq %rdi
  414. CFI_ADJUST_CFA_OFFSET -8
  415. jmp sysret_check
  416. /* Handle a signal */
  417. sysret_signal:
  418. TRACE_IRQS_ON
  419. ENABLE_INTERRUPTS(CLBR_NONE)
  420. #ifdef CONFIG_AUDITSYSCALL
  421. bt $TIF_SYSCALL_AUDIT,%edx
  422. jc sysret_audit
  423. #endif
  424. /* edx: work flags (arg3) */
  425. leaq do_notify_resume(%rip),%rax
  426. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  427. xorl %esi,%esi # oldset -> arg2
  428. call ptregscall_common
  429. movl $_TIF_WORK_MASK,%edi
  430. /* Use IRET because user could have changed frame. This
  431. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  432. DISABLE_INTERRUPTS(CLBR_NONE)
  433. TRACE_IRQS_OFF
  434. jmp int_with_check
  435. badsys:
  436. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  437. jmp ret_from_sys_call
  438. #ifdef CONFIG_AUDITSYSCALL
  439. /*
  440. * Fast path for syscall audit without full syscall trace.
  441. * We just call audit_syscall_entry() directly, and then
  442. * jump back to the normal fast path.
  443. */
  444. auditsys:
  445. movq %r10,%r9 /* 6th arg: 4th syscall arg */
  446. movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
  447. movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
  448. movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
  449. movq %rax,%rsi /* 2nd arg: syscall number */
  450. movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
  451. call audit_syscall_entry
  452. LOAD_ARGS 0 /* reload call-clobbered registers */
  453. jmp system_call_fastpath
  454. /*
  455. * Return fast path for syscall audit. Call audit_syscall_exit()
  456. * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
  457. * masked off.
  458. */
  459. sysret_audit:
  460. movq %rax,%rsi /* second arg, syscall return value */
  461. cmpq $0,%rax /* is it < 0? */
  462. setl %al /* 1 if so, 0 if not */
  463. movzbl %al,%edi /* zero-extend that into %edi */
  464. inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
  465. call audit_syscall_exit
  466. movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
  467. jmp sysret_check
  468. #endif /* CONFIG_AUDITSYSCALL */
  469. /* Do syscall tracing */
  470. tracesys:
  471. #ifdef CONFIG_AUDITSYSCALL
  472. testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  473. jz auditsys
  474. #endif
  475. SAVE_REST
  476. movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
  477. FIXUP_TOP_OF_STACK %rdi
  478. movq %rsp,%rdi
  479. call syscall_trace_enter
  480. /*
  481. * Reload arg registers from stack in case ptrace changed them.
  482. * We don't reload %rax because syscall_trace_enter() returned
  483. * the value it wants us to use in the table lookup.
  484. */
  485. LOAD_ARGS ARGOFFSET, 1
  486. RESTORE_REST
  487. cmpq $__NR_syscall_max,%rax
  488. ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
  489. movq %r10,%rcx /* fixup for C */
  490. call *sys_call_table(,%rax,8)
  491. movq %rax,RAX-ARGOFFSET(%rsp)
  492. /* Use IRET because user could have changed frame */
  493. /*
  494. * Syscall return path ending with IRET.
  495. * Has correct top of stack, but partial stack frame.
  496. */
  497. .globl int_ret_from_sys_call
  498. .globl int_with_check
  499. int_ret_from_sys_call:
  500. DISABLE_INTERRUPTS(CLBR_NONE)
  501. TRACE_IRQS_OFF
  502. testl $3,CS-ARGOFFSET(%rsp)
  503. je retint_restore_args
  504. movl $_TIF_ALLWORK_MASK,%edi
  505. /* edi: mask to check */
  506. int_with_check:
  507. LOCKDEP_SYS_EXIT_IRQ
  508. GET_THREAD_INFO(%rcx)
  509. movl TI_flags(%rcx),%edx
  510. andl %edi,%edx
  511. jnz int_careful
  512. andl $~TS_COMPAT,TI_status(%rcx)
  513. jmp retint_swapgs
  514. /* Either reschedule or signal or syscall exit tracking needed. */
  515. /* First do a reschedule test. */
  516. /* edx: work, edi: workmask */
  517. int_careful:
  518. bt $TIF_NEED_RESCHED,%edx
  519. jnc int_very_careful
  520. TRACE_IRQS_ON
  521. ENABLE_INTERRUPTS(CLBR_NONE)
  522. pushq %rdi
  523. CFI_ADJUST_CFA_OFFSET 8
  524. call schedule
  525. popq %rdi
  526. CFI_ADJUST_CFA_OFFSET -8
  527. DISABLE_INTERRUPTS(CLBR_NONE)
  528. TRACE_IRQS_OFF
  529. jmp int_with_check
  530. /* handle signals and tracing -- both require a full stack frame */
  531. int_very_careful:
  532. TRACE_IRQS_ON
  533. ENABLE_INTERRUPTS(CLBR_NONE)
  534. SAVE_REST
  535. /* Check for syscall exit trace */
  536. testl $_TIF_WORK_SYSCALL_EXIT,%edx
  537. jz int_signal
  538. pushq %rdi
  539. CFI_ADJUST_CFA_OFFSET 8
  540. leaq 8(%rsp),%rdi # &ptregs -> arg1
  541. call syscall_trace_leave
  542. popq %rdi
  543. CFI_ADJUST_CFA_OFFSET -8
  544. andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
  545. jmp int_restore_rest
  546. int_signal:
  547. testl $_TIF_DO_NOTIFY_MASK,%edx
  548. jz 1f
  549. movq %rsp,%rdi # &ptregs -> arg1
  550. xorl %esi,%esi # oldset -> arg2
  551. call do_notify_resume
  552. 1: movl $_TIF_WORK_MASK,%edi
  553. int_restore_rest:
  554. RESTORE_REST
  555. DISABLE_INTERRUPTS(CLBR_NONE)
  556. TRACE_IRQS_OFF
  557. jmp int_with_check
  558. CFI_ENDPROC
  559. END(system_call)
  560. /*
  561. * Certain special system calls that need to save a complete full stack frame.
  562. */
  563. .macro PTREGSCALL label,func,arg
  564. .globl \label
  565. \label:
  566. leaq \func(%rip),%rax
  567. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  568. jmp ptregscall_common
  569. END(\label)
  570. .endm
  571. CFI_STARTPROC
  572. PTREGSCALL stub_clone, sys_clone, %r8
  573. PTREGSCALL stub_fork, sys_fork, %rdi
  574. PTREGSCALL stub_vfork, sys_vfork, %rdi
  575. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  576. PTREGSCALL stub_iopl, sys_iopl, %rsi
  577. ENTRY(ptregscall_common)
  578. popq %r11
  579. CFI_ADJUST_CFA_OFFSET -8
  580. CFI_REGISTER rip, r11
  581. SAVE_REST
  582. movq %r11, %r15
  583. CFI_REGISTER rip, r15
  584. FIXUP_TOP_OF_STACK %r11
  585. call *%rax
  586. RESTORE_TOP_OF_STACK %r11
  587. movq %r15, %r11
  588. CFI_REGISTER rip, r11
  589. RESTORE_REST
  590. pushq %r11
  591. CFI_ADJUST_CFA_OFFSET 8
  592. CFI_REL_OFFSET rip, 0
  593. ret
  594. CFI_ENDPROC
  595. END(ptregscall_common)
  596. ENTRY(stub_execve)
  597. CFI_STARTPROC
  598. popq %r11
  599. CFI_ADJUST_CFA_OFFSET -8
  600. CFI_REGISTER rip, r11
  601. SAVE_REST
  602. FIXUP_TOP_OF_STACK %r11
  603. movq %rsp, %rcx
  604. call sys_execve
  605. RESTORE_TOP_OF_STACK %r11
  606. movq %rax,RAX(%rsp)
  607. RESTORE_REST
  608. jmp int_ret_from_sys_call
  609. CFI_ENDPROC
  610. END(stub_execve)
  611. /*
  612. * sigreturn is special because it needs to restore all registers on return.
  613. * This cannot be done with SYSRET, so use the IRET return path instead.
  614. */
  615. ENTRY(stub_rt_sigreturn)
  616. CFI_STARTPROC
  617. addq $8, %rsp
  618. CFI_ADJUST_CFA_OFFSET -8
  619. SAVE_REST
  620. movq %rsp,%rdi
  621. FIXUP_TOP_OF_STACK %r11
  622. call sys_rt_sigreturn
  623. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  624. RESTORE_REST
  625. jmp int_ret_from_sys_call
  626. CFI_ENDPROC
  627. END(stub_rt_sigreturn)
  628. /*
  629. * initial frame state for interrupts and exceptions
  630. */
  631. .macro _frame ref
  632. CFI_STARTPROC simple
  633. CFI_SIGNAL_FRAME
  634. CFI_DEF_CFA rsp,SS+8-\ref
  635. /*CFI_REL_OFFSET ss,SS-\ref*/
  636. CFI_REL_OFFSET rsp,RSP-\ref
  637. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  638. /*CFI_REL_OFFSET cs,CS-\ref*/
  639. CFI_REL_OFFSET rip,RIP-\ref
  640. .endm
  641. /* initial frame state for interrupts (and exceptions without error code) */
  642. #define INTR_FRAME _frame RIP
  643. /* initial frame state for exceptions with error code (and interrupts with
  644. vector already pushed) */
  645. #define XCPT_FRAME _frame ORIG_RAX
  646. /*
  647. * Interrupt entry/exit.
  648. *
  649. * Interrupt entry points save only callee clobbered registers in fast path.
  650. *
  651. * Entry runs with interrupts off.
  652. */
  653. /* 0(%rsp): interrupt number */
  654. .macro interrupt func
  655. cld
  656. SAVE_ARGS
  657. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  658. pushq %rbp
  659. /*
  660. * Save rbp twice: One is for marking the stack frame, as usual, and the
  661. * other, to fill pt_regs properly. This is because bx comes right
  662. * before the last saved register in that structure, and not bp. If the
  663. * base pointer were in the place bx is today, this would not be needed.
  664. */
  665. movq %rbp, -8(%rsp)
  666. CFI_ADJUST_CFA_OFFSET 8
  667. CFI_REL_OFFSET rbp, 0
  668. movq %rsp,%rbp
  669. CFI_DEF_CFA_REGISTER rbp
  670. testl $3,CS(%rdi)
  671. je 1f
  672. SWAPGS
  673. /* irqcount is used to check if a CPU is already on an interrupt
  674. stack or not. While this is essentially redundant with preempt_count
  675. it is a little cheaper to use a separate counter in the PDA
  676. (short of moving irq_enter into assembly, which would be too
  677. much work) */
  678. 1: incl %gs:pda_irqcount
  679. cmoveq %gs:pda_irqstackptr,%rsp
  680. push %rbp # backlink for old unwinder
  681. /*
  682. * We entered an interrupt context - irqs are off:
  683. */
  684. TRACE_IRQS_OFF
  685. call \func
  686. .endm
  687. ENTRY(common_interrupt)
  688. XCPT_FRAME
  689. interrupt do_IRQ
  690. /* 0(%rsp): oldrsp-ARGOFFSET */
  691. ret_from_intr:
  692. DISABLE_INTERRUPTS(CLBR_NONE)
  693. TRACE_IRQS_OFF
  694. decl %gs:pda_irqcount
  695. leaveq
  696. CFI_DEF_CFA_REGISTER rsp
  697. CFI_ADJUST_CFA_OFFSET -8
  698. exit_intr:
  699. GET_THREAD_INFO(%rcx)
  700. testl $3,CS-ARGOFFSET(%rsp)
  701. je retint_kernel
  702. /* Interrupt came from user space */
  703. /*
  704. * Has a correct top of stack, but a partial stack frame
  705. * %rcx: thread info. Interrupts off.
  706. */
  707. retint_with_reschedule:
  708. movl $_TIF_WORK_MASK,%edi
  709. retint_check:
  710. LOCKDEP_SYS_EXIT_IRQ
  711. movl TI_flags(%rcx),%edx
  712. andl %edi,%edx
  713. CFI_REMEMBER_STATE
  714. jnz retint_careful
  715. retint_swapgs: /* return to user-space */
  716. /*
  717. * The iretq could re-enable interrupts:
  718. */
  719. DISABLE_INTERRUPTS(CLBR_ANY)
  720. TRACE_IRQS_IRETQ
  721. SWAPGS
  722. jmp restore_args
  723. retint_restore_args: /* return to kernel space */
  724. DISABLE_INTERRUPTS(CLBR_ANY)
  725. /*
  726. * The iretq could re-enable interrupts:
  727. */
  728. TRACE_IRQS_IRETQ
  729. restore_args:
  730. RESTORE_ARGS 0,8,0
  731. irq_return:
  732. INTERRUPT_RETURN
  733. .section __ex_table, "a"
  734. .quad irq_return, bad_iret
  735. .previous
  736. #ifdef CONFIG_PARAVIRT
  737. ENTRY(native_iret)
  738. iretq
  739. .section __ex_table,"a"
  740. .quad native_iret, bad_iret
  741. .previous
  742. #endif
  743. .section .fixup,"ax"
  744. bad_iret:
  745. /*
  746. * The iret traps when the %cs or %ss being restored is bogus.
  747. * We've lost the original trap vector and error code.
  748. * #GPF is the most likely one to get for an invalid selector.
  749. * So pretend we completed the iret and took the #GPF in user mode.
  750. *
  751. * We are now running with the kernel GS after exception recovery.
  752. * But error_entry expects us to have user GS to match the user %cs,
  753. * so swap back.
  754. */
  755. pushq $0
  756. SWAPGS
  757. jmp general_protection
  758. .previous
  759. /* edi: workmask, edx: work */
  760. retint_careful:
  761. CFI_RESTORE_STATE
  762. bt $TIF_NEED_RESCHED,%edx
  763. jnc retint_signal
  764. TRACE_IRQS_ON
  765. ENABLE_INTERRUPTS(CLBR_NONE)
  766. pushq %rdi
  767. CFI_ADJUST_CFA_OFFSET 8
  768. call schedule
  769. popq %rdi
  770. CFI_ADJUST_CFA_OFFSET -8
  771. GET_THREAD_INFO(%rcx)
  772. DISABLE_INTERRUPTS(CLBR_NONE)
  773. TRACE_IRQS_OFF
  774. jmp retint_check
  775. retint_signal:
  776. testl $_TIF_DO_NOTIFY_MASK,%edx
  777. jz retint_swapgs
  778. TRACE_IRQS_ON
  779. ENABLE_INTERRUPTS(CLBR_NONE)
  780. SAVE_REST
  781. movq $-1,ORIG_RAX(%rsp)
  782. xorl %esi,%esi # oldset
  783. movq %rsp,%rdi # &pt_regs
  784. call do_notify_resume
  785. RESTORE_REST
  786. DISABLE_INTERRUPTS(CLBR_NONE)
  787. TRACE_IRQS_OFF
  788. GET_THREAD_INFO(%rcx)
  789. jmp retint_with_reschedule
  790. #ifdef CONFIG_PREEMPT
  791. /* Returning to kernel space. Check if we need preemption */
  792. /* rcx: threadinfo. interrupts off. */
  793. ENTRY(retint_kernel)
  794. cmpl $0,TI_preempt_count(%rcx)
  795. jnz retint_restore_args
  796. bt $TIF_NEED_RESCHED,TI_flags(%rcx)
  797. jnc retint_restore_args
  798. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  799. jnc retint_restore_args
  800. call preempt_schedule_irq
  801. jmp exit_intr
  802. #endif
  803. CFI_ENDPROC
  804. END(common_interrupt)
  805. /*
  806. * APIC interrupts.
  807. */
  808. .macro apicinterrupt num,func
  809. INTR_FRAME
  810. pushq $~(\num)
  811. CFI_ADJUST_CFA_OFFSET 8
  812. interrupt \func
  813. jmp ret_from_intr
  814. CFI_ENDPROC
  815. .endm
  816. ENTRY(thermal_interrupt)
  817. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  818. END(thermal_interrupt)
  819. ENTRY(threshold_interrupt)
  820. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  821. END(threshold_interrupt)
  822. #ifdef CONFIG_SMP
  823. ENTRY(reschedule_interrupt)
  824. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  825. END(reschedule_interrupt)
  826. .macro INVALIDATE_ENTRY num
  827. ENTRY(invalidate_interrupt\num)
  828. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  829. END(invalidate_interrupt\num)
  830. .endm
  831. INVALIDATE_ENTRY 0
  832. INVALIDATE_ENTRY 1
  833. INVALIDATE_ENTRY 2
  834. INVALIDATE_ENTRY 3
  835. INVALIDATE_ENTRY 4
  836. INVALIDATE_ENTRY 5
  837. INVALIDATE_ENTRY 6
  838. INVALIDATE_ENTRY 7
  839. ENTRY(call_function_interrupt)
  840. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  841. END(call_function_interrupt)
  842. ENTRY(call_function_single_interrupt)
  843. apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
  844. END(call_function_single_interrupt)
  845. ENTRY(irq_move_cleanup_interrupt)
  846. apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
  847. END(irq_move_cleanup_interrupt)
  848. #endif
  849. ENTRY(apic_timer_interrupt)
  850. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  851. END(apic_timer_interrupt)
  852. ENTRY(uv_bau_message_intr1)
  853. apicinterrupt 220,uv_bau_message_interrupt
  854. END(uv_bau_message_intr1)
  855. ENTRY(error_interrupt)
  856. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  857. END(error_interrupt)
  858. ENTRY(spurious_interrupt)
  859. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  860. END(spurious_interrupt)
  861. /*
  862. * Exception entry points.
  863. */
  864. .macro zeroentry sym
  865. INTR_FRAME
  866. PARAVIRT_ADJUST_EXCEPTION_FRAME
  867. pushq $0 /* push error code/oldrax */
  868. CFI_ADJUST_CFA_OFFSET 8
  869. pushq %rax /* push real oldrax to the rdi slot */
  870. CFI_ADJUST_CFA_OFFSET 8
  871. CFI_REL_OFFSET rax,0
  872. leaq \sym(%rip),%rax
  873. jmp error_entry
  874. CFI_ENDPROC
  875. .endm
  876. .macro errorentry sym
  877. XCPT_FRAME
  878. PARAVIRT_ADJUST_EXCEPTION_FRAME
  879. pushq %rax
  880. CFI_ADJUST_CFA_OFFSET 8
  881. CFI_REL_OFFSET rax,0
  882. leaq \sym(%rip),%rax
  883. jmp error_entry
  884. CFI_ENDPROC
  885. .endm
  886. /* error code is on the stack already */
  887. /* handle NMI like exceptions that can happen everywhere */
  888. .macro paranoidentry sym, ist=0, irqtrace=1
  889. SAVE_ALL
  890. cld
  891. movl $1,%ebx
  892. movl $MSR_GS_BASE,%ecx
  893. rdmsr
  894. testl %edx,%edx
  895. js 1f
  896. SWAPGS
  897. xorl %ebx,%ebx
  898. 1:
  899. .if \ist
  900. movq %gs:pda_data_offset, %rbp
  901. .endif
  902. .if \irqtrace
  903. TRACE_IRQS_OFF
  904. .endif
  905. movq %rsp,%rdi
  906. movq ORIG_RAX(%rsp),%rsi
  907. movq $-1,ORIG_RAX(%rsp)
  908. .if \ist
  909. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  910. .endif
  911. call \sym
  912. .if \ist
  913. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  914. .endif
  915. DISABLE_INTERRUPTS(CLBR_NONE)
  916. .if \irqtrace
  917. TRACE_IRQS_OFF
  918. .endif
  919. .endm
  920. /*
  921. * "Paranoid" exit path from exception stack.
  922. * Paranoid because this is used by NMIs and cannot take
  923. * any kernel state for granted.
  924. * We don't do kernel preemption checks here, because only
  925. * NMI should be common and it does not enable IRQs and
  926. * cannot get reschedule ticks.
  927. *
  928. * "trace" is 0 for the NMI handler only, because irq-tracing
  929. * is fundamentally NMI-unsafe. (we cannot change the soft and
  930. * hard flags at once, atomically)
  931. */
  932. .macro paranoidexit trace=1
  933. /* ebx: no swapgs flag */
  934. paranoid_exit\trace:
  935. testl %ebx,%ebx /* swapgs needed? */
  936. jnz paranoid_restore\trace
  937. testl $3,CS(%rsp)
  938. jnz paranoid_userspace\trace
  939. paranoid_swapgs\trace:
  940. .if \trace
  941. TRACE_IRQS_IRETQ 0
  942. .endif
  943. SWAPGS_UNSAFE_STACK
  944. paranoid_restore\trace:
  945. RESTORE_ALL 8
  946. jmp irq_return
  947. paranoid_userspace\trace:
  948. GET_THREAD_INFO(%rcx)
  949. movl TI_flags(%rcx),%ebx
  950. andl $_TIF_WORK_MASK,%ebx
  951. jz paranoid_swapgs\trace
  952. movq %rsp,%rdi /* &pt_regs */
  953. call sync_regs
  954. movq %rax,%rsp /* switch stack for scheduling */
  955. testl $_TIF_NEED_RESCHED,%ebx
  956. jnz paranoid_schedule\trace
  957. movl %ebx,%edx /* arg3: thread flags */
  958. .if \trace
  959. TRACE_IRQS_ON
  960. .endif
  961. ENABLE_INTERRUPTS(CLBR_NONE)
  962. xorl %esi,%esi /* arg2: oldset */
  963. movq %rsp,%rdi /* arg1: &pt_regs */
  964. call do_notify_resume
  965. DISABLE_INTERRUPTS(CLBR_NONE)
  966. .if \trace
  967. TRACE_IRQS_OFF
  968. .endif
  969. jmp paranoid_userspace\trace
  970. paranoid_schedule\trace:
  971. .if \trace
  972. TRACE_IRQS_ON
  973. .endif
  974. ENABLE_INTERRUPTS(CLBR_ANY)
  975. call schedule
  976. DISABLE_INTERRUPTS(CLBR_ANY)
  977. .if \trace
  978. TRACE_IRQS_OFF
  979. .endif
  980. jmp paranoid_userspace\trace
  981. CFI_ENDPROC
  982. .endm
  983. /*
  984. * Exception entry point. This expects an error code/orig_rax on the stack
  985. * and the exception handler in %rax.
  986. */
  987. KPROBE_ENTRY(error_entry)
  988. _frame RDI
  989. CFI_REL_OFFSET rax,0
  990. /* rdi slot contains rax, oldrax contains error code */
  991. cld
  992. subq $14*8,%rsp
  993. CFI_ADJUST_CFA_OFFSET (14*8)
  994. movq %rsi,13*8(%rsp)
  995. CFI_REL_OFFSET rsi,RSI
  996. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  997. CFI_REGISTER rax,rsi
  998. movq %rdx,12*8(%rsp)
  999. CFI_REL_OFFSET rdx,RDX
  1000. movq %rcx,11*8(%rsp)
  1001. CFI_REL_OFFSET rcx,RCX
  1002. movq %rsi,10*8(%rsp) /* store rax */
  1003. CFI_REL_OFFSET rax,RAX
  1004. movq %r8, 9*8(%rsp)
  1005. CFI_REL_OFFSET r8,R8
  1006. movq %r9, 8*8(%rsp)
  1007. CFI_REL_OFFSET r9,R9
  1008. movq %r10,7*8(%rsp)
  1009. CFI_REL_OFFSET r10,R10
  1010. movq %r11,6*8(%rsp)
  1011. CFI_REL_OFFSET r11,R11
  1012. movq %rbx,5*8(%rsp)
  1013. CFI_REL_OFFSET rbx,RBX
  1014. movq %rbp,4*8(%rsp)
  1015. CFI_REL_OFFSET rbp,RBP
  1016. movq %r12,3*8(%rsp)
  1017. CFI_REL_OFFSET r12,R12
  1018. movq %r13,2*8(%rsp)
  1019. CFI_REL_OFFSET r13,R13
  1020. movq %r14,1*8(%rsp)
  1021. CFI_REL_OFFSET r14,R14
  1022. movq %r15,(%rsp)
  1023. CFI_REL_OFFSET r15,R15
  1024. xorl %ebx,%ebx
  1025. testl $3,CS(%rsp)
  1026. je error_kernelspace
  1027. error_swapgs:
  1028. SWAPGS
  1029. error_sti:
  1030. TRACE_IRQS_OFF
  1031. movq %rdi,RDI(%rsp)
  1032. CFI_REL_OFFSET rdi,RDI
  1033. movq %rsp,%rdi
  1034. movq ORIG_RAX(%rsp),%rsi /* get error code */
  1035. movq $-1,ORIG_RAX(%rsp)
  1036. call *%rax
  1037. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  1038. error_exit:
  1039. movl %ebx,%eax
  1040. RESTORE_REST
  1041. DISABLE_INTERRUPTS(CLBR_NONE)
  1042. TRACE_IRQS_OFF
  1043. GET_THREAD_INFO(%rcx)
  1044. testl %eax,%eax
  1045. jne retint_kernel
  1046. LOCKDEP_SYS_EXIT_IRQ
  1047. movl TI_flags(%rcx),%edx
  1048. movl $_TIF_WORK_MASK,%edi
  1049. andl %edi,%edx
  1050. jnz retint_careful
  1051. jmp retint_swapgs
  1052. CFI_ENDPROC
  1053. error_kernelspace:
  1054. incl %ebx
  1055. /* There are two places in the kernel that can potentially fault with
  1056. usergs. Handle them here. The exception handlers after
  1057. iret run with kernel gs again, so don't set the user space flag.
  1058. B stepping K8s sometimes report an truncated RIP for IRET
  1059. exceptions returning to compat mode. Check for these here too. */
  1060. leaq irq_return(%rip),%rcx
  1061. cmpq %rcx,RIP(%rsp)
  1062. je error_swapgs
  1063. movl %ecx,%ecx /* zero extend */
  1064. cmpq %rcx,RIP(%rsp)
  1065. je error_swapgs
  1066. cmpq $gs_change,RIP(%rsp)
  1067. je error_swapgs
  1068. jmp error_sti
  1069. KPROBE_END(error_entry)
  1070. /* Reload gs selector with exception handling */
  1071. /* edi: new selector */
  1072. ENTRY(native_load_gs_index)
  1073. CFI_STARTPROC
  1074. pushf
  1075. CFI_ADJUST_CFA_OFFSET 8
  1076. DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
  1077. SWAPGS
  1078. gs_change:
  1079. movl %edi,%gs
  1080. 2: mfence /* workaround */
  1081. SWAPGS
  1082. popf
  1083. CFI_ADJUST_CFA_OFFSET -8
  1084. ret
  1085. CFI_ENDPROC
  1086. ENDPROC(native_load_gs_index)
  1087. .section __ex_table,"a"
  1088. .align 8
  1089. .quad gs_change,bad_gs
  1090. .previous
  1091. .section .fixup,"ax"
  1092. /* running with kernelgs */
  1093. bad_gs:
  1094. SWAPGS /* switch back to user gs */
  1095. xorl %eax,%eax
  1096. movl %eax,%gs
  1097. jmp 2b
  1098. .previous
  1099. /*
  1100. * Create a kernel thread.
  1101. *
  1102. * C extern interface:
  1103. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  1104. *
  1105. * asm input arguments:
  1106. * rdi: fn, rsi: arg, rdx: flags
  1107. */
  1108. ENTRY(kernel_thread)
  1109. CFI_STARTPROC
  1110. FAKE_STACK_FRAME $child_rip
  1111. SAVE_ALL
  1112. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  1113. movq %rdx,%rdi
  1114. orq kernel_thread_flags(%rip),%rdi
  1115. movq $-1, %rsi
  1116. movq %rsp, %rdx
  1117. xorl %r8d,%r8d
  1118. xorl %r9d,%r9d
  1119. # clone now
  1120. call do_fork
  1121. movq %rax,RAX(%rsp)
  1122. xorl %edi,%edi
  1123. /*
  1124. * It isn't worth to check for reschedule here,
  1125. * so internally to the x86_64 port you can rely on kernel_thread()
  1126. * not to reschedule the child before returning, this avoids the need
  1127. * of hacks for example to fork off the per-CPU idle tasks.
  1128. * [Hopefully no generic code relies on the reschedule -AK]
  1129. */
  1130. RESTORE_ALL
  1131. UNFAKE_STACK_FRAME
  1132. ret
  1133. CFI_ENDPROC
  1134. ENDPROC(kernel_thread)
  1135. child_rip:
  1136. pushq $0 # fake return address
  1137. CFI_STARTPROC
  1138. /*
  1139. * Here we are in the child and the registers are set as they were
  1140. * at kernel_thread() invocation in the parent.
  1141. */
  1142. movq %rdi, %rax
  1143. movq %rsi, %rdi
  1144. call *%rax
  1145. # exit
  1146. mov %eax, %edi
  1147. call do_exit
  1148. CFI_ENDPROC
  1149. ENDPROC(child_rip)
  1150. /*
  1151. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  1152. *
  1153. * C extern interface:
  1154. * extern long execve(char *name, char **argv, char **envp)
  1155. *
  1156. * asm input arguments:
  1157. * rdi: name, rsi: argv, rdx: envp
  1158. *
  1159. * We want to fallback into:
  1160. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
  1161. *
  1162. * do_sys_execve asm fallback arguments:
  1163. * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
  1164. */
  1165. ENTRY(kernel_execve)
  1166. CFI_STARTPROC
  1167. FAKE_STACK_FRAME $0
  1168. SAVE_ALL
  1169. movq %rsp,%rcx
  1170. call sys_execve
  1171. movq %rax, RAX(%rsp)
  1172. RESTORE_REST
  1173. testq %rax,%rax
  1174. je int_ret_from_sys_call
  1175. RESTORE_ARGS
  1176. UNFAKE_STACK_FRAME
  1177. ret
  1178. CFI_ENDPROC
  1179. ENDPROC(kernel_execve)
  1180. KPROBE_ENTRY(page_fault)
  1181. errorentry do_page_fault
  1182. KPROBE_END(page_fault)
  1183. ENTRY(coprocessor_error)
  1184. zeroentry do_coprocessor_error
  1185. END(coprocessor_error)
  1186. ENTRY(simd_coprocessor_error)
  1187. zeroentry do_simd_coprocessor_error
  1188. END(simd_coprocessor_error)
  1189. ENTRY(device_not_available)
  1190. zeroentry do_device_not_available
  1191. END(device_not_available)
  1192. /* runs on exception stack */
  1193. KPROBE_ENTRY(debug)
  1194. INTR_FRAME
  1195. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1196. pushq $0
  1197. CFI_ADJUST_CFA_OFFSET 8
  1198. paranoidentry do_debug, DEBUG_STACK
  1199. paranoidexit
  1200. KPROBE_END(debug)
  1201. /* runs on exception stack */
  1202. KPROBE_ENTRY(nmi)
  1203. INTR_FRAME
  1204. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1205. pushq $-1
  1206. CFI_ADJUST_CFA_OFFSET 8
  1207. paranoidentry do_nmi, 0, 0
  1208. #ifdef CONFIG_TRACE_IRQFLAGS
  1209. paranoidexit 0
  1210. #else
  1211. jmp paranoid_exit1
  1212. CFI_ENDPROC
  1213. #endif
  1214. KPROBE_END(nmi)
  1215. KPROBE_ENTRY(int3)
  1216. INTR_FRAME
  1217. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1218. pushq $0
  1219. CFI_ADJUST_CFA_OFFSET 8
  1220. paranoidentry do_int3, DEBUG_STACK
  1221. jmp paranoid_exit1
  1222. CFI_ENDPROC
  1223. KPROBE_END(int3)
  1224. ENTRY(overflow)
  1225. zeroentry do_overflow
  1226. END(overflow)
  1227. ENTRY(bounds)
  1228. zeroentry do_bounds
  1229. END(bounds)
  1230. ENTRY(invalid_op)
  1231. zeroentry do_invalid_op
  1232. END(invalid_op)
  1233. ENTRY(coprocessor_segment_overrun)
  1234. zeroentry do_coprocessor_segment_overrun
  1235. END(coprocessor_segment_overrun)
  1236. /* runs on exception stack */
  1237. ENTRY(double_fault)
  1238. XCPT_FRAME
  1239. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1240. paranoidentry do_double_fault
  1241. jmp paranoid_exit1
  1242. CFI_ENDPROC
  1243. END(double_fault)
  1244. ENTRY(invalid_TSS)
  1245. errorentry do_invalid_TSS
  1246. END(invalid_TSS)
  1247. ENTRY(segment_not_present)
  1248. errorentry do_segment_not_present
  1249. END(segment_not_present)
  1250. /* runs on exception stack */
  1251. ENTRY(stack_segment)
  1252. XCPT_FRAME
  1253. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1254. paranoidentry do_stack_segment
  1255. jmp paranoid_exit1
  1256. CFI_ENDPROC
  1257. END(stack_segment)
  1258. KPROBE_ENTRY(general_protection)
  1259. errorentry do_general_protection
  1260. KPROBE_END(general_protection)
  1261. ENTRY(alignment_check)
  1262. errorentry do_alignment_check
  1263. END(alignment_check)
  1264. ENTRY(divide_error)
  1265. zeroentry do_divide_error
  1266. END(divide_error)
  1267. ENTRY(spurious_interrupt_bug)
  1268. zeroentry do_spurious_interrupt_bug
  1269. END(spurious_interrupt_bug)
  1270. #ifdef CONFIG_X86_MCE
  1271. /* runs on exception stack */
  1272. ENTRY(machine_check)
  1273. INTR_FRAME
  1274. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1275. pushq $0
  1276. CFI_ADJUST_CFA_OFFSET 8
  1277. paranoidentry do_machine_check
  1278. jmp paranoid_exit1
  1279. CFI_ENDPROC
  1280. END(machine_check)
  1281. #endif
  1282. /* Call softirq on interrupt stack. Interrupts are off. */
  1283. ENTRY(call_softirq)
  1284. CFI_STARTPROC
  1285. push %rbp
  1286. CFI_ADJUST_CFA_OFFSET 8
  1287. CFI_REL_OFFSET rbp,0
  1288. mov %rsp,%rbp
  1289. CFI_DEF_CFA_REGISTER rbp
  1290. incl %gs:pda_irqcount
  1291. cmove %gs:pda_irqstackptr,%rsp
  1292. push %rbp # backlink for old unwinder
  1293. call __do_softirq
  1294. leaveq
  1295. CFI_DEF_CFA_REGISTER rsp
  1296. CFI_ADJUST_CFA_OFFSET -8
  1297. decl %gs:pda_irqcount
  1298. ret
  1299. CFI_ENDPROC
  1300. ENDPROC(call_softirq)
  1301. KPROBE_ENTRY(ignore_sysret)
  1302. CFI_STARTPROC
  1303. mov $-ENOSYS,%eax
  1304. sysret
  1305. CFI_ENDPROC
  1306. ENDPROC(ignore_sysret)
  1307. #ifdef CONFIG_XEN
  1308. ENTRY(xen_hypervisor_callback)
  1309. zeroentry xen_do_hypervisor_callback
  1310. END(xen_hypervisor_callback)
  1311. /*
  1312. # A note on the "critical region" in our callback handler.
  1313. # We want to avoid stacking callback handlers due to events occurring
  1314. # during handling of the last event. To do this, we keep events disabled
  1315. # until we've done all processing. HOWEVER, we must enable events before
  1316. # popping the stack frame (can't be done atomically) and so it would still
  1317. # be possible to get enough handler activations to overflow the stack.
  1318. # Although unlikely, bugs of that kind are hard to track down, so we'd
  1319. # like to avoid the possibility.
  1320. # So, on entry to the handler we detect whether we interrupted an
  1321. # existing activation in its critical region -- if so, we pop the current
  1322. # activation and restart the handler using the previous one.
  1323. */
  1324. ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
  1325. CFI_STARTPROC
  1326. /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
  1327. see the correct pointer to the pt_regs */
  1328. movq %rdi, %rsp # we don't return, adjust the stack frame
  1329. CFI_ENDPROC
  1330. CFI_DEFAULT_STACK
  1331. 11: incl %gs:pda_irqcount
  1332. movq %rsp,%rbp
  1333. CFI_DEF_CFA_REGISTER rbp
  1334. cmovzq %gs:pda_irqstackptr,%rsp
  1335. pushq %rbp # backlink for old unwinder
  1336. call xen_evtchn_do_upcall
  1337. popq %rsp
  1338. CFI_DEF_CFA_REGISTER rsp
  1339. decl %gs:pda_irqcount
  1340. jmp error_exit
  1341. CFI_ENDPROC
  1342. END(do_hypervisor_callback)
  1343. /*
  1344. # Hypervisor uses this for application faults while it executes.
  1345. # We get here for two reasons:
  1346. # 1. Fault while reloading DS, ES, FS or GS
  1347. # 2. Fault while executing IRET
  1348. # Category 1 we do not need to fix up as Xen has already reloaded all segment
  1349. # registers that could be reloaded and zeroed the others.
  1350. # Category 2 we fix up by killing the current process. We cannot use the
  1351. # normal Linux return path in this case because if we use the IRET hypercall
  1352. # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
  1353. # We distinguish between categories by comparing each saved segment register
  1354. # with its current contents: any discrepancy means we in category 1.
  1355. */
  1356. ENTRY(xen_failsafe_callback)
  1357. framesz = (RIP-0x30) /* workaround buggy gas */
  1358. _frame framesz
  1359. CFI_REL_OFFSET rcx, 0
  1360. CFI_REL_OFFSET r11, 8
  1361. movw %ds,%cx
  1362. cmpw %cx,0x10(%rsp)
  1363. CFI_REMEMBER_STATE
  1364. jne 1f
  1365. movw %es,%cx
  1366. cmpw %cx,0x18(%rsp)
  1367. jne 1f
  1368. movw %fs,%cx
  1369. cmpw %cx,0x20(%rsp)
  1370. jne 1f
  1371. movw %gs,%cx
  1372. cmpw %cx,0x28(%rsp)
  1373. jne 1f
  1374. /* All segments match their saved values => Category 2 (Bad IRET). */
  1375. movq (%rsp),%rcx
  1376. CFI_RESTORE rcx
  1377. movq 8(%rsp),%r11
  1378. CFI_RESTORE r11
  1379. addq $0x30,%rsp
  1380. CFI_ADJUST_CFA_OFFSET -0x30
  1381. pushq $0
  1382. CFI_ADJUST_CFA_OFFSET 8
  1383. pushq %r11
  1384. CFI_ADJUST_CFA_OFFSET 8
  1385. pushq %rcx
  1386. CFI_ADJUST_CFA_OFFSET 8
  1387. jmp general_protection
  1388. CFI_RESTORE_STATE
  1389. 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
  1390. movq (%rsp),%rcx
  1391. CFI_RESTORE rcx
  1392. movq 8(%rsp),%r11
  1393. CFI_RESTORE r11
  1394. addq $0x30,%rsp
  1395. CFI_ADJUST_CFA_OFFSET -0x30
  1396. pushq $0
  1397. CFI_ADJUST_CFA_OFFSET 8
  1398. SAVE_ALL
  1399. jmp error_exit
  1400. CFI_ENDPROC
  1401. END(xen_failsafe_callback)
  1402. #endif /* CONFIG_XEN */