entry_64.S 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. */
  8. /*
  9. * entry.S contains the system-call and fault low-level handling routines.
  10. *
  11. * NOTE: This code handles signal-recognition, which happens every time
  12. * after an interrupt and after each system call.
  13. *
  14. * Normal syscalls and interrupts don't save a full stack frame, this is
  15. * only done for syscall tracing, signals or fork/exec et.al.
  16. *
  17. * A note on terminology:
  18. * - top of stack: Architecture defined interrupt frame from SS to RIP
  19. * at the top of the kernel process stack.
  20. * - partial stack frame: partially saved registers upto R11.
  21. * - full stack frame: Like partial stack frame, but all register saved.
  22. *
  23. * Some macro usage:
  24. * - CFI macros are used to generate dwarf2 unwind information for better
  25. * backtraces. They don't change any code.
  26. * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
  27. * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
  28. * There are unfortunately lots of special cases where some registers
  29. * not touched. The macro is a big mess that should be cleaned up.
  30. * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  31. * Gives a full stack frame.
  32. * - ENTRY/END Define functions in the symbol table.
  33. * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  34. * frame that is otherwise undefined after a SYSCALL
  35. * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  36. * - errorentry/paranoidentry/zeroentry - Define exception entry points.
  37. */
  38. #include <linux/linkage.h>
  39. #include <asm/segment.h>
  40. #include <asm/cache.h>
  41. #include <asm/errno.h>
  42. #include <asm/dwarf2.h>
  43. #include <asm/calling.h>
  44. #include <asm/asm-offsets.h>
  45. #include <asm/msr.h>
  46. #include <asm/unistd.h>
  47. #include <asm/thread_info.h>
  48. #include <asm/hw_irq.h>
  49. #include <asm/page.h>
  50. #include <asm/irqflags.h>
  51. #include <asm/paravirt.h>
  52. #include <asm/ftrace.h>
  53. /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
  54. #include <linux/elf-em.h>
  55. #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
  56. #define __AUDIT_ARCH_64BIT 0x80000000
  57. #define __AUDIT_ARCH_LE 0x40000000
  58. .code64
  59. /*
  60. * Some macro's to hide the most frequently occuring CFI annotations.
  61. */
  62. .macro pushq_cfi reg
  63. pushq \reg
  64. CFI_ADJUST_CFA_OFFSET 8
  65. .endm
  66. .macro popq_cfi reg
  67. popq \reg
  68. CFI_ADJUST_CFA_OFFSET -8
  69. .endm
  70. .macro movq_cfi reg offset=0
  71. movq %\reg, \offset(%rsp)
  72. CFI_REL_OFFSET \reg, \offset
  73. .endm
  74. .macro movq_cfi_restore offset reg
  75. movq \offset(%rsp), %\reg
  76. CFI_RESTORE \reg
  77. .endm
  78. #ifdef CONFIG_FUNCTION_TRACER
  79. #ifdef CONFIG_DYNAMIC_FTRACE
  80. ENTRY(mcount)
  81. retq
  82. END(mcount)
  83. ENTRY(ftrace_caller)
  84. /* taken from glibc */
  85. subq $0x38, %rsp
  86. movq %rax, (%rsp)
  87. movq %rcx, 8(%rsp)
  88. movq %rdx, 16(%rsp)
  89. movq %rsi, 24(%rsp)
  90. movq %rdi, 32(%rsp)
  91. movq %r8, 40(%rsp)
  92. movq %r9, 48(%rsp)
  93. movq 0x38(%rsp), %rdi
  94. movq 8(%rbp), %rsi
  95. subq $MCOUNT_INSN_SIZE, %rdi
  96. .globl ftrace_call
  97. ftrace_call:
  98. call ftrace_stub
  99. movq 48(%rsp), %r9
  100. movq 40(%rsp), %r8
  101. movq 32(%rsp), %rdi
  102. movq 24(%rsp), %rsi
  103. movq 16(%rsp), %rdx
  104. movq 8(%rsp), %rcx
  105. movq (%rsp), %rax
  106. addq $0x38, %rsp
  107. .globl ftrace_stub
  108. ftrace_stub:
  109. retq
  110. END(ftrace_caller)
  111. #else /* ! CONFIG_DYNAMIC_FTRACE */
  112. ENTRY(mcount)
  113. cmpq $ftrace_stub, ftrace_trace_function
  114. jnz trace
  115. .globl ftrace_stub
  116. ftrace_stub:
  117. retq
  118. trace:
  119. /* taken from glibc */
  120. subq $0x38, %rsp
  121. movq %rax, (%rsp)
  122. movq %rcx, 8(%rsp)
  123. movq %rdx, 16(%rsp)
  124. movq %rsi, 24(%rsp)
  125. movq %rdi, 32(%rsp)
  126. movq %r8, 40(%rsp)
  127. movq %r9, 48(%rsp)
  128. movq 0x38(%rsp), %rdi
  129. movq 8(%rbp), %rsi
  130. subq $MCOUNT_INSN_SIZE, %rdi
  131. call *ftrace_trace_function
  132. movq 48(%rsp), %r9
  133. movq 40(%rsp), %r8
  134. movq 32(%rsp), %rdi
  135. movq 24(%rsp), %rsi
  136. movq 16(%rsp), %rdx
  137. movq 8(%rsp), %rcx
  138. movq (%rsp), %rax
  139. addq $0x38, %rsp
  140. jmp ftrace_stub
  141. END(mcount)
  142. #endif /* CONFIG_DYNAMIC_FTRACE */
  143. #endif /* CONFIG_FUNCTION_TRACER */
  144. #ifndef CONFIG_PREEMPT
  145. #define retint_kernel retint_restore_args
  146. #endif
  147. #ifdef CONFIG_PARAVIRT
  148. ENTRY(native_usergs_sysret64)
  149. swapgs
  150. sysretq
  151. #endif /* CONFIG_PARAVIRT */
  152. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  153. #ifdef CONFIG_TRACE_IRQFLAGS
  154. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  155. jnc 1f
  156. TRACE_IRQS_ON
  157. 1:
  158. #endif
  159. .endm
  160. /*
  161. * C code is not supposed to know about undefined top of stack. Every time
  162. * a C function with an pt_regs argument is called from the SYSCALL based
  163. * fast path FIXUP_TOP_OF_STACK is needed.
  164. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  165. * manipulation.
  166. */
  167. /* %rsp:at FRAMEEND */
  168. .macro FIXUP_TOP_OF_STACK tmp offset=0
  169. movq %gs:pda_oldrsp,\tmp
  170. movq \tmp,RSP+\offset(%rsp)
  171. movq $__USER_DS,SS+\offset(%rsp)
  172. movq $__USER_CS,CS+\offset(%rsp)
  173. movq $-1,RCX+\offset(%rsp)
  174. movq R11+\offset(%rsp),\tmp /* get eflags */
  175. movq \tmp,EFLAGS+\offset(%rsp)
  176. .endm
  177. .macro RESTORE_TOP_OF_STACK tmp offset=0
  178. movq RSP+\offset(%rsp),\tmp
  179. movq \tmp,%gs:pda_oldrsp
  180. movq EFLAGS+\offset(%rsp),\tmp
  181. movq \tmp,R11+\offset(%rsp)
  182. .endm
  183. .macro FAKE_STACK_FRAME child_rip
  184. /* push in order ss, rsp, eflags, cs, rip */
  185. xorl %eax, %eax
  186. pushq $__KERNEL_DS /* ss */
  187. CFI_ADJUST_CFA_OFFSET 8
  188. /*CFI_REL_OFFSET ss,0*/
  189. pushq %rax /* rsp */
  190. CFI_ADJUST_CFA_OFFSET 8
  191. CFI_REL_OFFSET rsp,0
  192. pushq $(1<<9) /* eflags - interrupts on */
  193. CFI_ADJUST_CFA_OFFSET 8
  194. /*CFI_REL_OFFSET rflags,0*/
  195. pushq $__KERNEL_CS /* cs */
  196. CFI_ADJUST_CFA_OFFSET 8
  197. /*CFI_REL_OFFSET cs,0*/
  198. pushq \child_rip /* rip */
  199. CFI_ADJUST_CFA_OFFSET 8
  200. CFI_REL_OFFSET rip,0
  201. pushq %rax /* orig rax */
  202. CFI_ADJUST_CFA_OFFSET 8
  203. .endm
  204. .macro UNFAKE_STACK_FRAME
  205. addq $8*6, %rsp
  206. CFI_ADJUST_CFA_OFFSET -(6*8)
  207. .endm
  208. /*
  209. * initial frame state for interrupts (and exceptions without error code)
  210. */
  211. .macro EMPTY_FRAME start=1 offset=0
  212. .if \start
  213. CFI_STARTPROC simple
  214. CFI_SIGNAL_FRAME
  215. CFI_DEF_CFA rsp,8+\offset
  216. .else
  217. CFI_DEF_CFA_OFFSET 8+\offset
  218. .endif
  219. .endm
  220. /*
  221. * initial frame state for interrupts (and exceptions without error code)
  222. */
  223. .macro INTR_FRAME start=1 offset=0
  224. EMPTY_FRAME \start, SS+8+\offset-RIP
  225. /*CFI_REL_OFFSET ss, SS+\offset-RIP*/
  226. CFI_REL_OFFSET rsp, RSP+\offset-RIP
  227. /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
  228. /*CFI_REL_OFFSET cs, CS+\offset-RIP*/
  229. CFI_REL_OFFSET rip, RIP+\offset-RIP
  230. .endm
  231. /*
  232. * initial frame state for exceptions with error code (and interrupts
  233. * with vector already pushed)
  234. */
  235. .macro XCPT_FRAME start=1 offset=0
  236. INTR_FRAME \start, RIP+\offset-ORIG_RAX
  237. /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
  238. .endm
  239. /*
  240. * frame that enables calling into C.
  241. */
  242. .macro PARTIAL_FRAME start=1 offset=0
  243. XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
  244. CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
  245. CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
  246. CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
  247. CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
  248. CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
  249. CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
  250. CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
  251. CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
  252. CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
  253. .endm
  254. /*
  255. * frame that enables passing a complete pt_regs to a C function.
  256. */
  257. .macro DEFAULT_FRAME start=1 offset=0
  258. PARTIAL_FRAME \start, R11+\offset-R15
  259. CFI_REL_OFFSET rbx, RBX+\offset
  260. CFI_REL_OFFSET rbp, RBP+\offset
  261. CFI_REL_OFFSET r12, R12+\offset
  262. CFI_REL_OFFSET r13, R13+\offset
  263. CFI_REL_OFFSET r14, R14+\offset
  264. CFI_REL_OFFSET r15, R15+\offset
  265. .endm
  266. /* save partial stack frame */
  267. ENTRY(save_args)
  268. XCPT_FRAME
  269. cld
  270. movq_cfi rdi, RDI+16-ARGOFFSET
  271. movq_cfi rsi, RSI+16-ARGOFFSET
  272. movq_cfi rdx, RDX+16-ARGOFFSET
  273. movq_cfi rcx, RCX+16-ARGOFFSET
  274. movq_cfi rax, RAX+16-ARGOFFSET
  275. movq_cfi r8, R8+16-ARGOFFSET
  276. movq_cfi r9, R9+16-ARGOFFSET
  277. movq_cfi r10, R10+16-ARGOFFSET
  278. movq_cfi r11, R11+16-ARGOFFSET
  279. leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */
  280. movq_cfi rbp, 8 /* push %rbp */
  281. leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
  282. testl $3, CS(%rdi)
  283. je 1f
  284. SWAPGS
  285. /*
  286. * irqcount is used to check if a CPU is already on an interrupt stack
  287. * or not. While this is essentially redundant with preempt_count it is
  288. * a little cheaper to use a separate counter in the PDA (short of
  289. * moving irq_enter into assembly, which would be too much work)
  290. */
  291. 1: incl %gs:pda_irqcount
  292. jne 2f
  293. popq_cfi %rax /* move return address... */
  294. mov %gs:pda_irqstackptr,%rsp
  295. EMPTY_FRAME 0
  296. pushq_cfi %rax /* ... to the new stack */
  297. /*
  298. * We entered an interrupt context - irqs are off:
  299. */
  300. 2: TRACE_IRQS_OFF
  301. ret
  302. CFI_ENDPROC
  303. END(save_args)
  304. ENTRY(save_rest)
  305. PARTIAL_FRAME 1 REST_SKIP+8
  306. movq 5*8+16(%rsp), %r11 /* save return address */
  307. movq_cfi rbx, RBX+16
  308. movq_cfi rbp, RBP+16
  309. movq_cfi r12, R12+16
  310. movq_cfi r13, R13+16
  311. movq_cfi r14, R14+16
  312. movq_cfi r15, R15+16
  313. movq %r11, 8(%rsp) /* return address */
  314. FIXUP_TOP_OF_STACK %r11, 16
  315. ret
  316. CFI_ENDPROC
  317. END(save_rest)
  318. /* save complete stack frame */
  319. ENTRY(save_paranoid)
  320. XCPT_FRAME 1 RDI+8
  321. cld
  322. movq_cfi rdi, RDI+8
  323. movq_cfi rsi, RSI+8
  324. movq_cfi rdx, RDX+8
  325. movq_cfi rcx, RCX+8
  326. movq_cfi rax, RAX+8
  327. movq_cfi r8, R8+8
  328. movq_cfi r9, R9+8
  329. movq_cfi r10, R10+8
  330. movq_cfi r11, R11+8
  331. movq_cfi rbx, RBX+8
  332. movq_cfi rbp, RBP+8
  333. movq_cfi r12, R12+8
  334. movq_cfi r13, R13+8
  335. movq_cfi r14, R14+8
  336. movq_cfi r15, R15+8
  337. movl $1,%ebx
  338. movl $MSR_GS_BASE,%ecx
  339. rdmsr
  340. testl %edx,%edx
  341. js 1f /* negative -> in kernel */
  342. SWAPGS
  343. xorl %ebx,%ebx
  344. 1: ret
  345. CFI_ENDPROC
  346. END(save_paranoid)
  347. /*
  348. * A newly forked process directly context switches into this.
  349. */
  350. /* rdi: prev */
  351. ENTRY(ret_from_fork)
  352. DEFAULT_FRAME
  353. push kernel_eflags(%rip)
  354. CFI_ADJUST_CFA_OFFSET 8
  355. popf # reset kernel eflags
  356. CFI_ADJUST_CFA_OFFSET -8
  357. call schedule_tail
  358. GET_THREAD_INFO(%rcx)
  359. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  360. jnz rff_trace
  361. rff_action:
  362. RESTORE_REST
  363. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  364. je int_ret_from_sys_call
  365. testl $_TIF_IA32,TI_flags(%rcx)
  366. jnz int_ret_from_sys_call
  367. RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
  368. jmp ret_from_sys_call
  369. rff_trace:
  370. movq %rsp,%rdi
  371. call syscall_trace_leave
  372. GET_THREAD_INFO(%rcx)
  373. jmp rff_action
  374. CFI_ENDPROC
  375. END(ret_from_fork)
  376. /*
  377. * System call entry. Upto 6 arguments in registers are supported.
  378. *
  379. * SYSCALL does not save anything on the stack and does not change the
  380. * stack pointer.
  381. */
  382. /*
  383. * Register setup:
  384. * rax system call number
  385. * rdi arg0
  386. * rcx return address for syscall/sysret, C arg3
  387. * rsi arg1
  388. * rdx arg2
  389. * r10 arg3 (--> moved to rcx for C)
  390. * r8 arg4
  391. * r9 arg5
  392. * r11 eflags for syscall/sysret, temporary for C
  393. * r12-r15,rbp,rbx saved by C code, not touched.
  394. *
  395. * Interrupts are off on entry.
  396. * Only called from user space.
  397. *
  398. * XXX if we had a free scratch register we could save the RSP into the stack frame
  399. * and report it properly in ps. Unfortunately we haven't.
  400. *
  401. * When user can change the frames always force IRET. That is because
  402. * it deals with uncanonical addresses better. SYSRET has trouble
  403. * with them due to bugs in both AMD and Intel CPUs.
  404. */
  405. ENTRY(system_call)
  406. CFI_STARTPROC simple
  407. CFI_SIGNAL_FRAME
  408. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  409. CFI_REGISTER rip,rcx
  410. /*CFI_REGISTER rflags,r11*/
  411. SWAPGS_UNSAFE_STACK
  412. /*
  413. * A hypervisor implementation might want to use a label
  414. * after the swapgs, so that it can do the swapgs
  415. * for the guest and jump here on syscall.
  416. */
  417. ENTRY(system_call_after_swapgs)
  418. movq %rsp,%gs:pda_oldrsp
  419. movq %gs:pda_kernelstack,%rsp
  420. /*
  421. * No need to follow this irqs off/on section - it's straight
  422. * and short:
  423. */
  424. ENABLE_INTERRUPTS(CLBR_NONE)
  425. SAVE_ARGS 8,1
  426. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  427. movq %rcx,RIP-ARGOFFSET(%rsp)
  428. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  429. GET_THREAD_INFO(%rcx)
  430. testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
  431. jnz tracesys
  432. system_call_fastpath:
  433. cmpq $__NR_syscall_max,%rax
  434. ja badsys
  435. movq %r10,%rcx
  436. call *sys_call_table(,%rax,8) # XXX: rip relative
  437. movq %rax,RAX-ARGOFFSET(%rsp)
  438. /*
  439. * Syscall return path ending with SYSRET (fast path)
  440. * Has incomplete stack frame and undefined top of stack.
  441. */
  442. ret_from_sys_call:
  443. movl $_TIF_ALLWORK_MASK,%edi
  444. /* edi: flagmask */
  445. sysret_check:
  446. LOCKDEP_SYS_EXIT
  447. GET_THREAD_INFO(%rcx)
  448. DISABLE_INTERRUPTS(CLBR_NONE)
  449. TRACE_IRQS_OFF
  450. movl TI_flags(%rcx),%edx
  451. andl %edi,%edx
  452. jnz sysret_careful
  453. CFI_REMEMBER_STATE
  454. /*
  455. * sysretq will re-enable interrupts:
  456. */
  457. TRACE_IRQS_ON
  458. movq RIP-ARGOFFSET(%rsp),%rcx
  459. CFI_REGISTER rip,rcx
  460. RESTORE_ARGS 0,-ARG_SKIP,1
  461. /*CFI_REGISTER rflags,r11*/
  462. movq %gs:pda_oldrsp, %rsp
  463. USERGS_SYSRET64
  464. CFI_RESTORE_STATE
  465. /* Handle reschedules */
  466. /* edx: work, edi: workmask */
  467. sysret_careful:
  468. bt $TIF_NEED_RESCHED,%edx
  469. jnc sysret_signal
  470. TRACE_IRQS_ON
  471. ENABLE_INTERRUPTS(CLBR_NONE)
  472. pushq %rdi
  473. CFI_ADJUST_CFA_OFFSET 8
  474. call schedule
  475. popq %rdi
  476. CFI_ADJUST_CFA_OFFSET -8
  477. jmp sysret_check
  478. /* Handle a signal */
  479. sysret_signal:
  480. TRACE_IRQS_ON
  481. ENABLE_INTERRUPTS(CLBR_NONE)
  482. #ifdef CONFIG_AUDITSYSCALL
  483. bt $TIF_SYSCALL_AUDIT,%edx
  484. jc sysret_audit
  485. #endif
  486. /* edx: work flags (arg3) */
  487. leaq do_notify_resume(%rip),%rax
  488. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  489. xorl %esi,%esi # oldset -> arg2
  490. call ptregscall_common
  491. movl $_TIF_WORK_MASK,%edi
  492. /* Use IRET because user could have changed frame. This
  493. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  494. DISABLE_INTERRUPTS(CLBR_NONE)
  495. TRACE_IRQS_OFF
  496. jmp int_with_check
  497. badsys:
  498. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  499. jmp ret_from_sys_call
  500. #ifdef CONFIG_AUDITSYSCALL
  501. /*
  502. * Fast path for syscall audit without full syscall trace.
  503. * We just call audit_syscall_entry() directly, and then
  504. * jump back to the normal fast path.
  505. */
  506. auditsys:
  507. movq %r10,%r9 /* 6th arg: 4th syscall arg */
  508. movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
  509. movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
  510. movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
  511. movq %rax,%rsi /* 2nd arg: syscall number */
  512. movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
  513. call audit_syscall_entry
  514. LOAD_ARGS 0 /* reload call-clobbered registers */
  515. jmp system_call_fastpath
  516. /*
  517. * Return fast path for syscall audit. Call audit_syscall_exit()
  518. * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
  519. * masked off.
  520. */
  521. sysret_audit:
  522. movq %rax,%rsi /* second arg, syscall return value */
  523. cmpq $0,%rax /* is it < 0? */
  524. setl %al /* 1 if so, 0 if not */
  525. movzbl %al,%edi /* zero-extend that into %edi */
  526. inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
  527. call audit_syscall_exit
  528. movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
  529. jmp sysret_check
  530. #endif /* CONFIG_AUDITSYSCALL */
  531. /* Do syscall tracing */
  532. tracesys:
  533. #ifdef CONFIG_AUDITSYSCALL
  534. testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  535. jz auditsys
  536. #endif
  537. SAVE_REST
  538. movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
  539. FIXUP_TOP_OF_STACK %rdi
  540. movq %rsp,%rdi
  541. call syscall_trace_enter
  542. /*
  543. * Reload arg registers from stack in case ptrace changed them.
  544. * We don't reload %rax because syscall_trace_enter() returned
  545. * the value it wants us to use in the table lookup.
  546. */
  547. LOAD_ARGS ARGOFFSET, 1
  548. RESTORE_REST
  549. cmpq $__NR_syscall_max,%rax
  550. ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
  551. movq %r10,%rcx /* fixup for C */
  552. call *sys_call_table(,%rax,8)
  553. movq %rax,RAX-ARGOFFSET(%rsp)
  554. /* Use IRET because user could have changed frame */
  555. /*
  556. * Syscall return path ending with IRET.
  557. * Has correct top of stack, but partial stack frame.
  558. */
  559. .globl int_ret_from_sys_call
  560. .globl int_with_check
  561. int_ret_from_sys_call:
  562. DISABLE_INTERRUPTS(CLBR_NONE)
  563. TRACE_IRQS_OFF
  564. testl $3,CS-ARGOFFSET(%rsp)
  565. je retint_restore_args
  566. movl $_TIF_ALLWORK_MASK,%edi
  567. /* edi: mask to check */
  568. int_with_check:
  569. LOCKDEP_SYS_EXIT_IRQ
  570. GET_THREAD_INFO(%rcx)
  571. movl TI_flags(%rcx),%edx
  572. andl %edi,%edx
  573. jnz int_careful
  574. andl $~TS_COMPAT,TI_status(%rcx)
  575. jmp retint_swapgs
  576. /* Either reschedule or signal or syscall exit tracking needed. */
  577. /* First do a reschedule test. */
  578. /* edx: work, edi: workmask */
  579. int_careful:
  580. bt $TIF_NEED_RESCHED,%edx
  581. jnc int_very_careful
  582. TRACE_IRQS_ON
  583. ENABLE_INTERRUPTS(CLBR_NONE)
  584. pushq %rdi
  585. CFI_ADJUST_CFA_OFFSET 8
  586. call schedule
  587. popq %rdi
  588. CFI_ADJUST_CFA_OFFSET -8
  589. DISABLE_INTERRUPTS(CLBR_NONE)
  590. TRACE_IRQS_OFF
  591. jmp int_with_check
  592. /* handle signals and tracing -- both require a full stack frame */
  593. int_very_careful:
  594. TRACE_IRQS_ON
  595. ENABLE_INTERRUPTS(CLBR_NONE)
  596. SAVE_REST
  597. /* Check for syscall exit trace */
  598. testl $_TIF_WORK_SYSCALL_EXIT,%edx
  599. jz int_signal
  600. pushq %rdi
  601. CFI_ADJUST_CFA_OFFSET 8
  602. leaq 8(%rsp),%rdi # &ptregs -> arg1
  603. call syscall_trace_leave
  604. popq %rdi
  605. CFI_ADJUST_CFA_OFFSET -8
  606. andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
  607. jmp int_restore_rest
  608. int_signal:
  609. testl $_TIF_DO_NOTIFY_MASK,%edx
  610. jz 1f
  611. movq %rsp,%rdi # &ptregs -> arg1
  612. xorl %esi,%esi # oldset -> arg2
  613. call do_notify_resume
  614. 1: movl $_TIF_WORK_MASK,%edi
  615. int_restore_rest:
  616. RESTORE_REST
  617. DISABLE_INTERRUPTS(CLBR_NONE)
  618. TRACE_IRQS_OFF
  619. jmp int_with_check
  620. CFI_ENDPROC
  621. END(system_call)
  622. /*
  623. * Certain special system calls that need to save a complete full stack frame.
  624. */
  625. .macro PTREGSCALL label,func,arg
  626. ENTRY(\label)
  627. PARTIAL_FRAME 1 8 /* offset 8: return address */
  628. subq $REST_SKIP, %rsp
  629. CFI_ADJUST_CFA_OFFSET REST_SKIP
  630. call save_rest
  631. DEFAULT_FRAME 0 8 /* offset 8: return address */
  632. leaq 8(%rsp), \arg /* pt_regs pointer */
  633. call \func
  634. jmp ptregscall_common
  635. CFI_ENDPROC
  636. END(\label)
  637. .endm
  638. PTREGSCALL stub_clone, sys_clone, %r8
  639. PTREGSCALL stub_fork, sys_fork, %rdi
  640. PTREGSCALL stub_vfork, sys_vfork, %rdi
  641. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  642. PTREGSCALL stub_iopl, sys_iopl, %rsi
  643. ENTRY(ptregscall_common)
  644. DEFAULT_FRAME 1 8 /* offset 8: return address */
  645. RESTORE_TOP_OF_STACK %r11, 8
  646. movq_cfi_restore R15+8, r15
  647. movq_cfi_restore R14+8, r14
  648. movq_cfi_restore R13+8, r13
  649. movq_cfi_restore R12+8, r12
  650. movq_cfi_restore RBP+8, rbp
  651. movq_cfi_restore RBX+8, rbx
  652. ret $REST_SKIP /* pop extended registers */
  653. CFI_ENDPROC
  654. END(ptregscall_common)
  655. ENTRY(stub_execve)
  656. CFI_STARTPROC
  657. popq %r11
  658. CFI_ADJUST_CFA_OFFSET -8
  659. CFI_REGISTER rip, r11
  660. SAVE_REST
  661. FIXUP_TOP_OF_STACK %r11
  662. movq %rsp, %rcx
  663. call sys_execve
  664. RESTORE_TOP_OF_STACK %r11
  665. movq %rax,RAX(%rsp)
  666. RESTORE_REST
  667. jmp int_ret_from_sys_call
  668. CFI_ENDPROC
  669. END(stub_execve)
  670. /*
  671. * sigreturn is special because it needs to restore all registers on return.
  672. * This cannot be done with SYSRET, so use the IRET return path instead.
  673. */
  674. ENTRY(stub_rt_sigreturn)
  675. CFI_STARTPROC
  676. addq $8, %rsp
  677. CFI_ADJUST_CFA_OFFSET -8
  678. SAVE_REST
  679. movq %rsp,%rdi
  680. FIXUP_TOP_OF_STACK %r11
  681. call sys_rt_sigreturn
  682. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  683. RESTORE_REST
  684. jmp int_ret_from_sys_call
  685. CFI_ENDPROC
  686. END(stub_rt_sigreturn)
  687. /*
  688. * Build the entry stubs and pointer table with some assembler magic.
  689. * We pack 7 stubs into a single 32-byte chunk, which will fit in a
  690. * single cache line on all modern x86 implementations.
  691. */
  692. .section .init.rodata,"a"
  693. ENTRY(interrupt)
  694. .text
  695. .p2align 5
  696. .p2align CONFIG_X86_L1_CACHE_SHIFT
  697. ENTRY(irq_entries_start)
  698. INTR_FRAME
  699. vector=FIRST_EXTERNAL_VECTOR
  700. .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
  701. .balign 32
  702. .rept 7
  703. .if vector < NR_VECTORS
  704. .if vector <> FIRST_EXTERNAL_VECTOR
  705. CFI_ADJUST_CFA_OFFSET -8
  706. .endif
  707. 1: pushq $(~vector+0x80) /* Note: always in signed byte range */
  708. CFI_ADJUST_CFA_OFFSET 8
  709. .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
  710. jmp 2f
  711. .endif
  712. .previous
  713. .quad 1b
  714. .text
  715. vector=vector+1
  716. .endif
  717. .endr
  718. 2: jmp common_interrupt
  719. .endr
  720. CFI_ENDPROC
  721. END(irq_entries_start)
  722. .previous
  723. END(interrupt)
  724. .previous
  725. /*
  726. * Interrupt entry/exit.
  727. *
  728. * Interrupt entry points save only callee clobbered registers in fast path.
  729. *
  730. * Entry runs with interrupts off.
  731. */
  732. /* 0(%rsp): ~(interrupt number) */
  733. .macro interrupt func
  734. subq $10*8, %rsp
  735. CFI_ADJUST_CFA_OFFSET 10*8
  736. call save_args
  737. PARTIAL_FRAME 0
  738. call \func
  739. .endm
  740. /*
  741. * The interrupt stubs push (~vector+0x80) onto the stack and
  742. * then jump to common_interrupt.
  743. */
  744. .p2align CONFIG_X86_L1_CACHE_SHIFT
  745. common_interrupt:
  746. XCPT_FRAME
  747. addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
  748. interrupt do_IRQ
  749. /* 0(%rsp): oldrsp-ARGOFFSET */
  750. ret_from_intr:
  751. DISABLE_INTERRUPTS(CLBR_NONE)
  752. TRACE_IRQS_OFF
  753. decl %gs:pda_irqcount
  754. leaveq
  755. CFI_DEF_CFA_REGISTER rsp
  756. CFI_ADJUST_CFA_OFFSET -8
  757. exit_intr:
  758. GET_THREAD_INFO(%rcx)
  759. testl $3,CS-ARGOFFSET(%rsp)
  760. je retint_kernel
  761. /* Interrupt came from user space */
  762. /*
  763. * Has a correct top of stack, but a partial stack frame
  764. * %rcx: thread info. Interrupts off.
  765. */
  766. retint_with_reschedule:
  767. movl $_TIF_WORK_MASK,%edi
  768. retint_check:
  769. LOCKDEP_SYS_EXIT_IRQ
  770. movl TI_flags(%rcx),%edx
  771. andl %edi,%edx
  772. CFI_REMEMBER_STATE
  773. jnz retint_careful
  774. retint_swapgs: /* return to user-space */
  775. /*
  776. * The iretq could re-enable interrupts:
  777. */
  778. DISABLE_INTERRUPTS(CLBR_ANY)
  779. TRACE_IRQS_IRETQ
  780. SWAPGS
  781. jmp restore_args
  782. retint_restore_args: /* return to kernel space */
  783. DISABLE_INTERRUPTS(CLBR_ANY)
  784. /*
  785. * The iretq could re-enable interrupts:
  786. */
  787. TRACE_IRQS_IRETQ
  788. restore_args:
  789. RESTORE_ARGS 0,8,0
  790. irq_return:
  791. INTERRUPT_RETURN
  792. .section __ex_table, "a"
  793. .quad irq_return, bad_iret
  794. .previous
  795. #ifdef CONFIG_PARAVIRT
  796. ENTRY(native_iret)
  797. iretq
  798. .section __ex_table,"a"
  799. .quad native_iret, bad_iret
  800. .previous
  801. #endif
  802. .section .fixup,"ax"
  803. bad_iret:
  804. /*
  805. * The iret traps when the %cs or %ss being restored is bogus.
  806. * We've lost the original trap vector and error code.
  807. * #GPF is the most likely one to get for an invalid selector.
  808. * So pretend we completed the iret and took the #GPF in user mode.
  809. *
  810. * We are now running with the kernel GS after exception recovery.
  811. * But error_entry expects us to have user GS to match the user %cs,
  812. * so swap back.
  813. */
  814. pushq $0
  815. SWAPGS
  816. jmp general_protection
  817. .previous
  818. /* edi: workmask, edx: work */
  819. retint_careful:
  820. CFI_RESTORE_STATE
  821. bt $TIF_NEED_RESCHED,%edx
  822. jnc retint_signal
  823. TRACE_IRQS_ON
  824. ENABLE_INTERRUPTS(CLBR_NONE)
  825. pushq %rdi
  826. CFI_ADJUST_CFA_OFFSET 8
  827. call schedule
  828. popq %rdi
  829. CFI_ADJUST_CFA_OFFSET -8
  830. GET_THREAD_INFO(%rcx)
  831. DISABLE_INTERRUPTS(CLBR_NONE)
  832. TRACE_IRQS_OFF
  833. jmp retint_check
  834. retint_signal:
  835. testl $_TIF_DO_NOTIFY_MASK,%edx
  836. jz retint_swapgs
  837. TRACE_IRQS_ON
  838. ENABLE_INTERRUPTS(CLBR_NONE)
  839. SAVE_REST
  840. movq $-1,ORIG_RAX(%rsp)
  841. xorl %esi,%esi # oldset
  842. movq %rsp,%rdi # &pt_regs
  843. call do_notify_resume
  844. RESTORE_REST
  845. DISABLE_INTERRUPTS(CLBR_NONE)
  846. TRACE_IRQS_OFF
  847. GET_THREAD_INFO(%rcx)
  848. jmp retint_with_reschedule
  849. #ifdef CONFIG_PREEMPT
  850. /* Returning to kernel space. Check if we need preemption */
  851. /* rcx: threadinfo. interrupts off. */
  852. ENTRY(retint_kernel)
  853. cmpl $0,TI_preempt_count(%rcx)
  854. jnz retint_restore_args
  855. bt $TIF_NEED_RESCHED,TI_flags(%rcx)
  856. jnc retint_restore_args
  857. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  858. jnc retint_restore_args
  859. call preempt_schedule_irq
  860. jmp exit_intr
  861. #endif
  862. CFI_ENDPROC
  863. END(common_interrupt)
  864. /*
  865. * APIC interrupts.
  866. */
  867. .p2align 5
  868. .macro apicinterrupt num,func
  869. INTR_FRAME
  870. pushq $~(\num)
  871. CFI_ADJUST_CFA_OFFSET 8
  872. interrupt \func
  873. jmp ret_from_intr
  874. CFI_ENDPROC
  875. .endm
  876. ENTRY(thermal_interrupt)
  877. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  878. END(thermal_interrupt)
  879. ENTRY(threshold_interrupt)
  880. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  881. END(threshold_interrupt)
  882. #ifdef CONFIG_SMP
  883. ENTRY(reschedule_interrupt)
  884. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  885. END(reschedule_interrupt)
  886. .macro INVALIDATE_ENTRY num
  887. ENTRY(invalidate_interrupt\num)
  888. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  889. END(invalidate_interrupt\num)
  890. .endm
  891. INVALIDATE_ENTRY 0
  892. INVALIDATE_ENTRY 1
  893. INVALIDATE_ENTRY 2
  894. INVALIDATE_ENTRY 3
  895. INVALIDATE_ENTRY 4
  896. INVALIDATE_ENTRY 5
  897. INVALIDATE_ENTRY 6
  898. INVALIDATE_ENTRY 7
  899. ENTRY(call_function_interrupt)
  900. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  901. END(call_function_interrupt)
  902. ENTRY(call_function_single_interrupt)
  903. apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
  904. END(call_function_single_interrupt)
  905. ENTRY(irq_move_cleanup_interrupt)
  906. apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
  907. END(irq_move_cleanup_interrupt)
  908. #endif
  909. ENTRY(apic_timer_interrupt)
  910. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  911. END(apic_timer_interrupt)
  912. ENTRY(uv_bau_message_intr1)
  913. apicinterrupt 220,uv_bau_message_interrupt
  914. END(uv_bau_message_intr1)
  915. ENTRY(error_interrupt)
  916. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  917. END(error_interrupt)
  918. ENTRY(spurious_interrupt)
  919. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  920. END(spurious_interrupt)
  921. /*
  922. * Exception entry points.
  923. */
  924. .macro zeroentry sym
  925. INTR_FRAME
  926. PARAVIRT_ADJUST_EXCEPTION_FRAME
  927. pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
  928. subq $15*8,%rsp
  929. CFI_ADJUST_CFA_OFFSET 15*8
  930. call error_entry
  931. DEFAULT_FRAME 0
  932. movq %rsp,%rdi /* pt_regs pointer */
  933. xorl %esi,%esi /* no error code */
  934. call \sym
  935. jmp error_exit /* %ebx: no swapgs flag */
  936. CFI_ENDPROC
  937. .endm
  938. .macro errorentry sym
  939. XCPT_FRAME
  940. PARAVIRT_ADJUST_EXCEPTION_FRAME
  941. subq $15*8,%rsp
  942. CFI_ADJUST_CFA_OFFSET 15*8
  943. call error_entry
  944. DEFAULT_FRAME 0
  945. movq %rsp,%rdi /* pt_regs pointer */
  946. movq ORIG_RAX(%rsp),%rsi /* get error code */
  947. movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
  948. call \sym
  949. jmp error_exit /* %ebx: no swapgs flag */
  950. CFI_ENDPROC
  951. .endm
  952. /* error code is on the stack already */
  953. .macro paranoidentry sym ist=0
  954. subq $15*8, %rsp
  955. CFI_ADJUST_CFA_OFFSET 15*8
  956. call save_paranoid
  957. DEFAULT_FRAME 0
  958. .if \ist
  959. movq %gs:pda_data_offset, %rbp
  960. .endif
  961. TRACE_IRQS_OFF
  962. movq %rsp,%rdi
  963. movq ORIG_RAX(%rsp),%rsi
  964. movq $-1,ORIG_RAX(%rsp)
  965. .if \ist
  966. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  967. .endif
  968. call \sym
  969. .if \ist
  970. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  971. .endif
  972. DISABLE_INTERRUPTS(CLBR_NONE)
  973. TRACE_IRQS_OFF
  974. .endm
  975. /*
  976. * "Paranoid" exit path from exception stack.
  977. * Paranoid because this is used by NMIs and cannot take
  978. * any kernel state for granted.
  979. * We don't do kernel preemption checks here, because only
  980. * NMI should be common and it does not enable IRQs and
  981. * cannot get reschedule ticks.
  982. *
  983. * "trace" is 0 for the NMI handler only, because irq-tracing
  984. * is fundamentally NMI-unsafe. (we cannot change the soft and
  985. * hard flags at once, atomically)
  986. */
  987. /* ebx: no swapgs flag */
  988. KPROBE_ENTRY(paranoid_exit)
  989. INTR_FRAME
  990. testl %ebx,%ebx /* swapgs needed? */
  991. jnz paranoid_restore
  992. testl $3,CS(%rsp)
  993. jnz paranoid_userspace
  994. paranoid_swapgs:
  995. TRACE_IRQS_IRETQ 0
  996. SWAPGS_UNSAFE_STACK
  997. paranoid_restore:
  998. RESTORE_ALL 8
  999. jmp irq_return
  1000. paranoid_userspace:
  1001. GET_THREAD_INFO(%rcx)
  1002. movl TI_flags(%rcx),%ebx
  1003. andl $_TIF_WORK_MASK,%ebx
  1004. jz paranoid_swapgs
  1005. movq %rsp,%rdi /* &pt_regs */
  1006. call sync_regs
  1007. movq %rax,%rsp /* switch stack for scheduling */
  1008. testl $_TIF_NEED_RESCHED,%ebx
  1009. jnz paranoid_schedule
  1010. movl %ebx,%edx /* arg3: thread flags */
  1011. TRACE_IRQS_ON
  1012. ENABLE_INTERRUPTS(CLBR_NONE)
  1013. xorl %esi,%esi /* arg2: oldset */
  1014. movq %rsp,%rdi /* arg1: &pt_regs */
  1015. call do_notify_resume
  1016. DISABLE_INTERRUPTS(CLBR_NONE)
  1017. TRACE_IRQS_OFF
  1018. jmp paranoid_userspace
  1019. paranoid_schedule:
  1020. TRACE_IRQS_ON
  1021. ENABLE_INTERRUPTS(CLBR_ANY)
  1022. call schedule
  1023. DISABLE_INTERRUPTS(CLBR_ANY)
  1024. TRACE_IRQS_OFF
  1025. jmp paranoid_userspace
  1026. CFI_ENDPROC
  1027. END(paranoid_exit)
  1028. /*
  1029. * Exception entry point. This expects an error code/orig_rax on the stack.
  1030. * returns in "no swapgs flag" in %ebx.
  1031. */
  1032. KPROBE_ENTRY(error_entry)
  1033. XCPT_FRAME
  1034. CFI_ADJUST_CFA_OFFSET 15*8
  1035. /* oldrax contains error code */
  1036. cld
  1037. movq_cfi rdi, RDI+8
  1038. movq_cfi rsi, RSI+8
  1039. movq_cfi rdx, RDX+8
  1040. movq_cfi rcx, RCX+8
  1041. movq_cfi rax, RAX+8
  1042. movq_cfi r8, R8+8
  1043. movq_cfi r9, R9+8
  1044. movq_cfi r10, R10+8
  1045. movq_cfi r11, R11+8
  1046. movq_cfi rbx, RBX+8
  1047. movq_cfi rbp, RBP+8
  1048. movq_cfi r12, R12+8
  1049. movq_cfi r13, R13+8
  1050. movq_cfi r14, R14+8
  1051. movq_cfi r15, R15+8
  1052. xorl %ebx,%ebx
  1053. testl $3,CS+8(%rsp)
  1054. je error_kernelspace
  1055. error_swapgs:
  1056. SWAPGS
  1057. error_sti:
  1058. TRACE_IRQS_OFF
  1059. ret
  1060. CFI_ENDPROC
  1061. /*
  1062. * There are two places in the kernel that can potentially fault with
  1063. * usergs. Handle them here. The exception handlers after iret run with
  1064. * kernel gs again, so don't set the user space flag. B stepping K8s
  1065. * sometimes report an truncated RIP for IRET exceptions returning to
  1066. * compat mode. Check for these here too.
  1067. */
  1068. error_kernelspace:
  1069. incl %ebx
  1070. leaq irq_return(%rip),%rcx
  1071. cmpq %rcx,RIP+8(%rsp)
  1072. je error_swapgs
  1073. movl %ecx,%ecx /* zero extend */
  1074. cmpq %rcx,RIP+8(%rsp)
  1075. je error_swapgs
  1076. cmpq $gs_change,RIP+8(%rsp)
  1077. je error_swapgs
  1078. jmp error_sti
  1079. KPROBE_END(error_entry)
  1080. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  1081. KPROBE_ENTRY(error_exit)
  1082. DEFAULT_FRAME
  1083. movl %ebx,%eax
  1084. RESTORE_REST
  1085. DISABLE_INTERRUPTS(CLBR_NONE)
  1086. TRACE_IRQS_OFF
  1087. GET_THREAD_INFO(%rcx)
  1088. testl %eax,%eax
  1089. jne retint_kernel
  1090. LOCKDEP_SYS_EXIT_IRQ
  1091. movl TI_flags(%rcx),%edx
  1092. movl $_TIF_WORK_MASK,%edi
  1093. andl %edi,%edx
  1094. jnz retint_careful
  1095. jmp retint_swapgs
  1096. CFI_ENDPROC
  1097. KPROBE_END(error_exit)
  1098. /* Reload gs selector with exception handling */
  1099. /* edi: new selector */
  1100. ENTRY(native_load_gs_index)
  1101. CFI_STARTPROC
  1102. pushf
  1103. CFI_ADJUST_CFA_OFFSET 8
  1104. DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
  1105. SWAPGS
  1106. gs_change:
  1107. movl %edi,%gs
  1108. 2: mfence /* workaround */
  1109. SWAPGS
  1110. popf
  1111. CFI_ADJUST_CFA_OFFSET -8
  1112. ret
  1113. CFI_ENDPROC
  1114. ENDPROC(native_load_gs_index)
  1115. .section __ex_table,"a"
  1116. .align 8
  1117. .quad gs_change,bad_gs
  1118. .previous
  1119. .section .fixup,"ax"
  1120. /* running with kernelgs */
  1121. bad_gs:
  1122. SWAPGS /* switch back to user gs */
  1123. xorl %eax,%eax
  1124. movl %eax,%gs
  1125. jmp 2b
  1126. .previous
  1127. /*
  1128. * Create a kernel thread.
  1129. *
  1130. * C extern interface:
  1131. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  1132. *
  1133. * asm input arguments:
  1134. * rdi: fn, rsi: arg, rdx: flags
  1135. */
  1136. ENTRY(kernel_thread)
  1137. CFI_STARTPROC
  1138. FAKE_STACK_FRAME $child_rip
  1139. SAVE_ALL
  1140. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  1141. movq %rdx,%rdi
  1142. orq kernel_thread_flags(%rip),%rdi
  1143. movq $-1, %rsi
  1144. movq %rsp, %rdx
  1145. xorl %r8d,%r8d
  1146. xorl %r9d,%r9d
  1147. # clone now
  1148. call do_fork
  1149. movq %rax,RAX(%rsp)
  1150. xorl %edi,%edi
  1151. /*
  1152. * It isn't worth to check for reschedule here,
  1153. * so internally to the x86_64 port you can rely on kernel_thread()
  1154. * not to reschedule the child before returning, this avoids the need
  1155. * of hacks for example to fork off the per-CPU idle tasks.
  1156. * [Hopefully no generic code relies on the reschedule -AK]
  1157. */
  1158. RESTORE_ALL
  1159. UNFAKE_STACK_FRAME
  1160. ret
  1161. CFI_ENDPROC
  1162. ENDPROC(kernel_thread)
  1163. child_rip:
  1164. pushq $0 # fake return address
  1165. CFI_STARTPROC
  1166. /*
  1167. * Here we are in the child and the registers are set as they were
  1168. * at kernel_thread() invocation in the parent.
  1169. */
  1170. movq %rdi, %rax
  1171. movq %rsi, %rdi
  1172. call *%rax
  1173. # exit
  1174. mov %eax, %edi
  1175. call do_exit
  1176. CFI_ENDPROC
  1177. ENDPROC(child_rip)
  1178. /*
  1179. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  1180. *
  1181. * C extern interface:
  1182. * extern long execve(char *name, char **argv, char **envp)
  1183. *
  1184. * asm input arguments:
  1185. * rdi: name, rsi: argv, rdx: envp
  1186. *
  1187. * We want to fallback into:
  1188. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
  1189. *
  1190. * do_sys_execve asm fallback arguments:
  1191. * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
  1192. */
  1193. ENTRY(kernel_execve)
  1194. CFI_STARTPROC
  1195. FAKE_STACK_FRAME $0
  1196. SAVE_ALL
  1197. movq %rsp,%rcx
  1198. call sys_execve
  1199. movq %rax, RAX(%rsp)
  1200. RESTORE_REST
  1201. testq %rax,%rax
  1202. je int_ret_from_sys_call
  1203. RESTORE_ARGS
  1204. UNFAKE_STACK_FRAME
  1205. ret
  1206. CFI_ENDPROC
  1207. ENDPROC(kernel_execve)
  1208. KPROBE_ENTRY(page_fault)
  1209. errorentry do_page_fault
  1210. KPROBE_END(page_fault)
  1211. ENTRY(coprocessor_error)
  1212. zeroentry do_coprocessor_error
  1213. END(coprocessor_error)
  1214. ENTRY(simd_coprocessor_error)
  1215. zeroentry do_simd_coprocessor_error
  1216. END(simd_coprocessor_error)
  1217. ENTRY(device_not_available)
  1218. zeroentry do_device_not_available
  1219. END(device_not_available)
  1220. /* runs on exception stack */
  1221. KPROBE_ENTRY(debug)
  1222. INTR_FRAME
  1223. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1224. pushq $0
  1225. CFI_ADJUST_CFA_OFFSET 8
  1226. paranoidentry do_debug, DEBUG_STACK
  1227. jmp paranoid_exit
  1228. CFI_ENDPROC
  1229. KPROBE_END(debug)
  1230. /* runs on exception stack */
  1231. KPROBE_ENTRY(nmi)
  1232. INTR_FRAME
  1233. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1234. pushq_cfi $-1
  1235. subq $15*8, %rsp
  1236. CFI_ADJUST_CFA_OFFSET 15*8
  1237. call save_paranoid
  1238. DEFAULT_FRAME 0
  1239. /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
  1240. movq %rsp,%rdi
  1241. movq ORIG_RAX(%rsp),%rsi
  1242. movq $-1,ORIG_RAX(%rsp)
  1243. call do_nmi
  1244. DISABLE_INTERRUPTS(CLBR_NONE)
  1245. #ifdef CONFIG_TRACE_IRQFLAGS
  1246. /* paranoidexit; without TRACE_IRQS_OFF */
  1247. /* ebx: no swapgs flag */
  1248. nmi_exit:
  1249. testl %ebx,%ebx /* swapgs needed? */
  1250. jnz nmi_restore
  1251. testl $3,CS(%rsp)
  1252. jnz nmi_userspace
  1253. nmi_swapgs:
  1254. SWAPGS_UNSAFE_STACK
  1255. nmi_restore:
  1256. RESTORE_ALL 8
  1257. jmp irq_return
  1258. nmi_userspace:
  1259. GET_THREAD_INFO(%rcx)
  1260. movl TI_flags(%rcx),%ebx
  1261. andl $_TIF_WORK_MASK,%ebx
  1262. jz nmi_swapgs
  1263. movq %rsp,%rdi /* &pt_regs */
  1264. call sync_regs
  1265. movq %rax,%rsp /* switch stack for scheduling */
  1266. testl $_TIF_NEED_RESCHED,%ebx
  1267. jnz nmi_schedule
  1268. movl %ebx,%edx /* arg3: thread flags */
  1269. ENABLE_INTERRUPTS(CLBR_NONE)
  1270. xorl %esi,%esi /* arg2: oldset */
  1271. movq %rsp,%rdi /* arg1: &pt_regs */
  1272. call do_notify_resume
  1273. DISABLE_INTERRUPTS(CLBR_NONE)
  1274. jmp nmi_userspace
  1275. nmi_schedule:
  1276. ENABLE_INTERRUPTS(CLBR_ANY)
  1277. call schedule
  1278. DISABLE_INTERRUPTS(CLBR_ANY)
  1279. jmp nmi_userspace
  1280. CFI_ENDPROC
  1281. #else
  1282. jmp paranoid_exit
  1283. CFI_ENDPROC
  1284. #endif
  1285. KPROBE_END(nmi)
  1286. KPROBE_ENTRY(int3)
  1287. INTR_FRAME
  1288. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1289. pushq $0
  1290. CFI_ADJUST_CFA_OFFSET 8
  1291. paranoidentry do_int3, DEBUG_STACK
  1292. jmp paranoid_exit
  1293. CFI_ENDPROC
  1294. KPROBE_END(int3)
  1295. ENTRY(overflow)
  1296. zeroentry do_overflow
  1297. END(overflow)
  1298. ENTRY(bounds)
  1299. zeroentry do_bounds
  1300. END(bounds)
  1301. ENTRY(invalid_op)
  1302. zeroentry do_invalid_op
  1303. END(invalid_op)
  1304. ENTRY(coprocessor_segment_overrun)
  1305. zeroentry do_coprocessor_segment_overrun
  1306. END(coprocessor_segment_overrun)
  1307. /* runs on exception stack */
  1308. ENTRY(double_fault)
  1309. XCPT_FRAME
  1310. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1311. paranoidentry do_double_fault
  1312. jmp paranoid_exit
  1313. CFI_ENDPROC
  1314. END(double_fault)
  1315. ENTRY(invalid_TSS)
  1316. errorentry do_invalid_TSS
  1317. END(invalid_TSS)
  1318. ENTRY(segment_not_present)
  1319. errorentry do_segment_not_present
  1320. END(segment_not_present)
  1321. /* runs on exception stack */
  1322. ENTRY(stack_segment)
  1323. XCPT_FRAME
  1324. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1325. paranoidentry do_stack_segment
  1326. jmp paranoid_exit
  1327. CFI_ENDPROC
  1328. END(stack_segment)
  1329. KPROBE_ENTRY(general_protection)
  1330. errorentry do_general_protection
  1331. KPROBE_END(general_protection)
  1332. ENTRY(alignment_check)
  1333. errorentry do_alignment_check
  1334. END(alignment_check)
  1335. ENTRY(divide_error)
  1336. zeroentry do_divide_error
  1337. END(divide_error)
  1338. ENTRY(spurious_interrupt_bug)
  1339. zeroentry do_spurious_interrupt_bug
  1340. END(spurious_interrupt_bug)
  1341. #ifdef CONFIG_X86_MCE
  1342. /* runs on exception stack */
  1343. ENTRY(machine_check)
  1344. INTR_FRAME
  1345. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1346. pushq $0
  1347. CFI_ADJUST_CFA_OFFSET 8
  1348. paranoidentry do_machine_check
  1349. jmp paranoid_exit
  1350. CFI_ENDPROC
  1351. END(machine_check)
  1352. #endif
  1353. /* Call softirq on interrupt stack. Interrupts are off. */
  1354. ENTRY(call_softirq)
  1355. CFI_STARTPROC
  1356. push %rbp
  1357. CFI_ADJUST_CFA_OFFSET 8
  1358. CFI_REL_OFFSET rbp,0
  1359. mov %rsp,%rbp
  1360. CFI_DEF_CFA_REGISTER rbp
  1361. incl %gs:pda_irqcount
  1362. cmove %gs:pda_irqstackptr,%rsp
  1363. push %rbp # backlink for old unwinder
  1364. call __do_softirq
  1365. leaveq
  1366. CFI_DEF_CFA_REGISTER rsp
  1367. CFI_ADJUST_CFA_OFFSET -8
  1368. decl %gs:pda_irqcount
  1369. ret
  1370. CFI_ENDPROC
  1371. ENDPROC(call_softirq)
  1372. KPROBE_ENTRY(ignore_sysret)
  1373. CFI_STARTPROC
  1374. mov $-ENOSYS,%eax
  1375. sysret
  1376. CFI_ENDPROC
  1377. ENDPROC(ignore_sysret)
  1378. #ifdef CONFIG_XEN
  1379. ENTRY(xen_hypervisor_callback)
  1380. zeroentry xen_do_hypervisor_callback
  1381. END(xen_hypervisor_callback)
  1382. /*
  1383. # A note on the "critical region" in our callback handler.
  1384. # We want to avoid stacking callback handlers due to events occurring
  1385. # during handling of the last event. To do this, we keep events disabled
  1386. # until we've done all processing. HOWEVER, we must enable events before
  1387. # popping the stack frame (can't be done atomically) and so it would still
  1388. # be possible to get enough handler activations to overflow the stack.
  1389. # Although unlikely, bugs of that kind are hard to track down, so we'd
  1390. # like to avoid the possibility.
  1391. # So, on entry to the handler we detect whether we interrupted an
  1392. # existing activation in its critical region -- if so, we pop the current
  1393. # activation and restart the handler using the previous one.
  1394. */
  1395. ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
  1396. CFI_STARTPROC
  1397. /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
  1398. see the correct pointer to the pt_regs */
  1399. movq %rdi, %rsp # we don't return, adjust the stack frame
  1400. CFI_ENDPROC
  1401. DEFAULT_FRAME
  1402. 11: incl %gs:pda_irqcount
  1403. movq %rsp,%rbp
  1404. CFI_DEF_CFA_REGISTER rbp
  1405. cmovzq %gs:pda_irqstackptr,%rsp
  1406. pushq %rbp # backlink for old unwinder
  1407. call xen_evtchn_do_upcall
  1408. popq %rsp
  1409. CFI_DEF_CFA_REGISTER rsp
  1410. decl %gs:pda_irqcount
  1411. jmp error_exit
  1412. CFI_ENDPROC
  1413. END(do_hypervisor_callback)
  1414. /*
  1415. # Hypervisor uses this for application faults while it executes.
  1416. # We get here for two reasons:
  1417. # 1. Fault while reloading DS, ES, FS or GS
  1418. # 2. Fault while executing IRET
  1419. # Category 1 we do not need to fix up as Xen has already reloaded all segment
  1420. # registers that could be reloaded and zeroed the others.
  1421. # Category 2 we fix up by killing the current process. We cannot use the
  1422. # normal Linux return path in this case because if we use the IRET hypercall
  1423. # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
  1424. # We distinguish between categories by comparing each saved segment register
  1425. # with its current contents: any discrepancy means we in category 1.
  1426. */
  1427. ENTRY(xen_failsafe_callback)
  1428. INTR_FRAME 1 (6*8)
  1429. /*CFI_REL_OFFSET gs,GS*/
  1430. /*CFI_REL_OFFSET fs,FS*/
  1431. /*CFI_REL_OFFSET es,ES*/
  1432. /*CFI_REL_OFFSET ds,DS*/
  1433. CFI_REL_OFFSET r11,8
  1434. CFI_REL_OFFSET rcx,0
  1435. movw %ds,%cx
  1436. cmpw %cx,0x10(%rsp)
  1437. CFI_REMEMBER_STATE
  1438. jne 1f
  1439. movw %es,%cx
  1440. cmpw %cx,0x18(%rsp)
  1441. jne 1f
  1442. movw %fs,%cx
  1443. cmpw %cx,0x20(%rsp)
  1444. jne 1f
  1445. movw %gs,%cx
  1446. cmpw %cx,0x28(%rsp)
  1447. jne 1f
  1448. /* All segments match their saved values => Category 2 (Bad IRET). */
  1449. movq (%rsp),%rcx
  1450. CFI_RESTORE rcx
  1451. movq 8(%rsp),%r11
  1452. CFI_RESTORE r11
  1453. addq $0x30,%rsp
  1454. CFI_ADJUST_CFA_OFFSET -0x30
  1455. pushq_cfi $0 /* RIP */
  1456. pushq_cfi %r11
  1457. pushq_cfi %rcx
  1458. jmp general_protection
  1459. CFI_RESTORE_STATE
  1460. 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
  1461. movq (%rsp),%rcx
  1462. CFI_RESTORE rcx
  1463. movq 8(%rsp),%r11
  1464. CFI_RESTORE r11
  1465. addq $0x30,%rsp
  1466. CFI_ADJUST_CFA_OFFSET -0x30
  1467. pushq_cfi $0
  1468. SAVE_ALL
  1469. jmp error_exit
  1470. CFI_ENDPROC
  1471. END(xen_failsafe_callback)
  1472. #endif /* CONFIG_XEN */