entry.S 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. */
  8. /*
  9. * entry.S contains the system-call and fault low-level handling routines.
  10. *
  11. * NOTE: This code handles signal-recognition, which happens every time
  12. * after an interrupt and after each system call.
  13. *
  14. * Normal syscalls and interrupts don't save a full stack frame, this is
  15. * only done for syscall tracing, signals or fork/exec et.al.
  16. *
  17. * A note on terminology:
  18. * - top of stack: Architecture defined interrupt frame from SS to RIP
  19. * at the top of the kernel process stack.
  20. * - partial stack frame: partially saved registers upto R11.
  21. * - full stack frame: Like partial stack frame, but all register saved.
  22. *
  23. * Some macro usage:
  24. * - CFI macros are used to generate dwarf2 unwind information for better
  25. * backtraces. They don't change any code.
  26. * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
  27. * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
  28. * There are unfortunately lots of special cases where some registers
  29. * not touched. The macro is a big mess that should be cleaned up.
  30. * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  31. * Gives a full stack frame.
  32. * - ENTRY/END Define functions in the symbol table.
  33. * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  34. * frame that is otherwise undefined after a SYSCALL
  35. * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  36. * - errorentry/paranoidentry/zeroentry - Define exception entry points.
  37. */
  38. #include <linux/linkage.h>
  39. #include <asm/segment.h>
  40. #include <asm/cache.h>
  41. #include <asm/errno.h>
  42. #include <asm/dwarf2.h>
  43. #include <asm/calling.h>
  44. #include <asm/asm-offsets.h>
  45. #include <asm/msr.h>
  46. #include <asm/unistd.h>
  47. #include <asm/thread_info.h>
  48. #include <asm/hw_irq.h>
  49. #include <asm/page.h>
  50. #include <asm/irqflags.h>
  51. .code64
  52. #ifndef CONFIG_PREEMPT
  53. #define retint_kernel retint_restore_args
  54. #endif
  55. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  56. #ifdef CONFIG_TRACE_IRQFLAGS
  57. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  58. jnc 1f
  59. TRACE_IRQS_ON
  60. 1:
  61. #endif
  62. .endm
  63. /*
  64. * C code is not supposed to know about undefined top of stack. Every time
  65. * a C function with an pt_regs argument is called from the SYSCALL based
  66. * fast path FIXUP_TOP_OF_STACK is needed.
  67. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  68. * manipulation.
  69. */
  70. /* %rsp:at FRAMEEND */
  71. .macro FIXUP_TOP_OF_STACK tmp
  72. movq %gs:pda_oldrsp,\tmp
  73. movq \tmp,RSP(%rsp)
  74. movq $__USER_DS,SS(%rsp)
  75. movq $__USER_CS,CS(%rsp)
  76. movq $-1,RCX(%rsp)
  77. movq R11(%rsp),\tmp /* get eflags */
  78. movq \tmp,EFLAGS(%rsp)
  79. .endm
  80. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  81. movq RSP-\offset(%rsp),\tmp
  82. movq \tmp,%gs:pda_oldrsp
  83. movq EFLAGS-\offset(%rsp),\tmp
  84. movq \tmp,R11-\offset(%rsp)
  85. .endm
  86. .macro FAKE_STACK_FRAME child_rip
  87. /* push in order ss, rsp, eflags, cs, rip */
  88. xorl %eax, %eax
  89. pushq %rax /* ss */
  90. CFI_ADJUST_CFA_OFFSET 8
  91. /*CFI_REL_OFFSET ss,0*/
  92. pushq %rax /* rsp */
  93. CFI_ADJUST_CFA_OFFSET 8
  94. CFI_REL_OFFSET rsp,0
  95. pushq $(1<<9) /* eflags - interrupts on */
  96. CFI_ADJUST_CFA_OFFSET 8
  97. /*CFI_REL_OFFSET rflags,0*/
  98. pushq $__KERNEL_CS /* cs */
  99. CFI_ADJUST_CFA_OFFSET 8
  100. /*CFI_REL_OFFSET cs,0*/
  101. pushq \child_rip /* rip */
  102. CFI_ADJUST_CFA_OFFSET 8
  103. CFI_REL_OFFSET rip,0
  104. pushq %rax /* orig rax */
  105. CFI_ADJUST_CFA_OFFSET 8
  106. .endm
  107. .macro UNFAKE_STACK_FRAME
  108. addq $8*6, %rsp
  109. CFI_ADJUST_CFA_OFFSET -(6*8)
  110. .endm
  111. .macro CFI_DEFAULT_STACK start=1
  112. .if \start
  113. CFI_STARTPROC simple
  114. CFI_DEF_CFA rsp,SS+8
  115. .else
  116. CFI_DEF_CFA_OFFSET SS+8
  117. .endif
  118. CFI_REL_OFFSET r15,R15
  119. CFI_REL_OFFSET r14,R14
  120. CFI_REL_OFFSET r13,R13
  121. CFI_REL_OFFSET r12,R12
  122. CFI_REL_OFFSET rbp,RBP
  123. CFI_REL_OFFSET rbx,RBX
  124. CFI_REL_OFFSET r11,R11
  125. CFI_REL_OFFSET r10,R10
  126. CFI_REL_OFFSET r9,R9
  127. CFI_REL_OFFSET r8,R8
  128. CFI_REL_OFFSET rax,RAX
  129. CFI_REL_OFFSET rcx,RCX
  130. CFI_REL_OFFSET rdx,RDX
  131. CFI_REL_OFFSET rsi,RSI
  132. CFI_REL_OFFSET rdi,RDI
  133. CFI_REL_OFFSET rip,RIP
  134. /*CFI_REL_OFFSET cs,CS*/
  135. /*CFI_REL_OFFSET rflags,EFLAGS*/
  136. CFI_REL_OFFSET rsp,RSP
  137. /*CFI_REL_OFFSET ss,SS*/
  138. .endm
  139. /*
  140. * A newly forked process directly context switches into this.
  141. */
  142. /* rdi: prev */
  143. ENTRY(ret_from_fork)
  144. CFI_DEFAULT_STACK
  145. call schedule_tail
  146. GET_THREAD_INFO(%rcx)
  147. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  148. jnz rff_trace
  149. rff_action:
  150. RESTORE_REST
  151. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  152. je int_ret_from_sys_call
  153. testl $_TIF_IA32,threadinfo_flags(%rcx)
  154. jnz int_ret_from_sys_call
  155. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  156. jmp ret_from_sys_call
  157. rff_trace:
  158. movq %rsp,%rdi
  159. call syscall_trace_leave
  160. GET_THREAD_INFO(%rcx)
  161. jmp rff_action
  162. CFI_ENDPROC
  163. END(ret_from_fork)
  164. /*
  165. * System call entry. Upto 6 arguments in registers are supported.
  166. *
  167. * SYSCALL does not save anything on the stack and does not change the
  168. * stack pointer.
  169. */
  170. /*
  171. * Register setup:
  172. * rax system call number
  173. * rdi arg0
  174. * rcx return address for syscall/sysret, C arg3
  175. * rsi arg1
  176. * rdx arg2
  177. * r10 arg3 (--> moved to rcx for C)
  178. * r8 arg4
  179. * r9 arg5
  180. * r11 eflags for syscall/sysret, temporary for C
  181. * r12-r15,rbp,rbx saved by C code, not touched.
  182. *
  183. * Interrupts are off on entry.
  184. * Only called from user space.
  185. *
  186. * XXX if we had a free scratch register we could save the RSP into the stack frame
  187. * and report it properly in ps. Unfortunately we haven't.
  188. *
  189. * When user can change the frames always force IRET. That is because
  190. * it deals with uncanonical addresses better. SYSRET has trouble
  191. * with them due to bugs in both AMD and Intel CPUs.
  192. */
  193. ENTRY(system_call)
  194. CFI_STARTPROC simple
  195. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  196. CFI_REGISTER rip,rcx
  197. /*CFI_REGISTER rflags,r11*/
  198. swapgs
  199. movq %rsp,%gs:pda_oldrsp
  200. movq %gs:pda_kernelstack,%rsp
  201. /*
  202. * No need to follow this irqs off/on section - it's straight
  203. * and short:
  204. */
  205. sti
  206. SAVE_ARGS 8,1
  207. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  208. movq %rcx,RIP-ARGOFFSET(%rsp)
  209. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  210. GET_THREAD_INFO(%rcx)
  211. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  212. CFI_REMEMBER_STATE
  213. jnz tracesys
  214. cmpq $__NR_syscall_max,%rax
  215. ja badsys
  216. movq %r10,%rcx
  217. call *sys_call_table(,%rax,8) # XXX: rip relative
  218. movq %rax,RAX-ARGOFFSET(%rsp)
  219. /*
  220. * Syscall return path ending with SYSRET (fast path)
  221. * Has incomplete stack frame and undefined top of stack.
  222. */
  223. .globl ret_from_sys_call
  224. ret_from_sys_call:
  225. movl $_TIF_ALLWORK_MASK,%edi
  226. /* edi: flagmask */
  227. sysret_check:
  228. GET_THREAD_INFO(%rcx)
  229. cli
  230. TRACE_IRQS_OFF
  231. movl threadinfo_flags(%rcx),%edx
  232. andl %edi,%edx
  233. CFI_REMEMBER_STATE
  234. jnz sysret_careful
  235. /*
  236. * sysretq will re-enable interrupts:
  237. */
  238. TRACE_IRQS_ON
  239. movq RIP-ARGOFFSET(%rsp),%rcx
  240. CFI_REGISTER rip,rcx
  241. RESTORE_ARGS 0,-ARG_SKIP,1
  242. /*CFI_REGISTER rflags,r11*/
  243. movq %gs:pda_oldrsp,%rsp
  244. swapgs
  245. sysretq
  246. /* Handle reschedules */
  247. /* edx: work, edi: workmask */
  248. sysret_careful:
  249. CFI_RESTORE_STATE
  250. bt $TIF_NEED_RESCHED,%edx
  251. jnc sysret_signal
  252. TRACE_IRQS_ON
  253. sti
  254. pushq %rdi
  255. CFI_ADJUST_CFA_OFFSET 8
  256. call schedule
  257. popq %rdi
  258. CFI_ADJUST_CFA_OFFSET -8
  259. jmp sysret_check
  260. /* Handle a signal */
  261. sysret_signal:
  262. TRACE_IRQS_ON
  263. sti
  264. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  265. jz 1f
  266. /* Really a signal */
  267. /* edx: work flags (arg3) */
  268. leaq do_notify_resume(%rip),%rax
  269. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  270. xorl %esi,%esi # oldset -> arg2
  271. call ptregscall_common
  272. 1: movl $_TIF_NEED_RESCHED,%edi
  273. /* Use IRET because user could have changed frame. This
  274. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  275. cli
  276. TRACE_IRQS_OFF
  277. jmp int_with_check
  278. badsys:
  279. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  280. jmp ret_from_sys_call
  281. /* Do syscall tracing */
  282. tracesys:
  283. CFI_RESTORE_STATE
  284. SAVE_REST
  285. movq $-ENOSYS,RAX(%rsp)
  286. FIXUP_TOP_OF_STACK %rdi
  287. movq %rsp,%rdi
  288. call syscall_trace_enter
  289. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  290. RESTORE_REST
  291. cmpq $__NR_syscall_max,%rax
  292. ja 1f
  293. movq %r10,%rcx /* fixup for C */
  294. call *sys_call_table(,%rax,8)
  295. 1: movq %rax,RAX-ARGOFFSET(%rsp)
  296. /* Use IRET because user could have changed frame */
  297. jmp int_ret_from_sys_call
  298. CFI_ENDPROC
  299. END(system_call)
  300. /*
  301. * Syscall return path ending with IRET.
  302. * Has correct top of stack, but partial stack frame.
  303. */
  304. ENTRY(int_ret_from_sys_call)
  305. CFI_STARTPROC simple
  306. CFI_DEF_CFA rsp,SS+8-ARGOFFSET
  307. /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
  308. CFI_REL_OFFSET rsp,RSP-ARGOFFSET
  309. /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
  310. /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
  311. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  312. CFI_REL_OFFSET rdx,RDX-ARGOFFSET
  313. CFI_REL_OFFSET rcx,RCX-ARGOFFSET
  314. CFI_REL_OFFSET rax,RAX-ARGOFFSET
  315. CFI_REL_OFFSET rdi,RDI-ARGOFFSET
  316. CFI_REL_OFFSET rsi,RSI-ARGOFFSET
  317. CFI_REL_OFFSET r8,R8-ARGOFFSET
  318. CFI_REL_OFFSET r9,R9-ARGOFFSET
  319. CFI_REL_OFFSET r10,R10-ARGOFFSET
  320. CFI_REL_OFFSET r11,R11-ARGOFFSET
  321. cli
  322. TRACE_IRQS_OFF
  323. testl $3,CS-ARGOFFSET(%rsp)
  324. je retint_restore_args
  325. movl $_TIF_ALLWORK_MASK,%edi
  326. /* edi: mask to check */
  327. int_with_check:
  328. GET_THREAD_INFO(%rcx)
  329. movl threadinfo_flags(%rcx),%edx
  330. andl %edi,%edx
  331. jnz int_careful
  332. andl $~TS_COMPAT,threadinfo_status(%rcx)
  333. jmp retint_swapgs
  334. /* Either reschedule or signal or syscall exit tracking needed. */
  335. /* First do a reschedule test. */
  336. /* edx: work, edi: workmask */
  337. int_careful:
  338. bt $TIF_NEED_RESCHED,%edx
  339. jnc int_very_careful
  340. TRACE_IRQS_ON
  341. sti
  342. pushq %rdi
  343. CFI_ADJUST_CFA_OFFSET 8
  344. call schedule
  345. popq %rdi
  346. CFI_ADJUST_CFA_OFFSET -8
  347. cli
  348. TRACE_IRQS_OFF
  349. jmp int_with_check
  350. /* handle signals and tracing -- both require a full stack frame */
  351. int_very_careful:
  352. TRACE_IRQS_ON
  353. sti
  354. SAVE_REST
  355. /* Check for syscall exit trace */
  356. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  357. jz int_signal
  358. pushq %rdi
  359. CFI_ADJUST_CFA_OFFSET 8
  360. leaq 8(%rsp),%rdi # &ptregs -> arg1
  361. call syscall_trace_leave
  362. popq %rdi
  363. CFI_ADJUST_CFA_OFFSET -8
  364. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  365. cli
  366. TRACE_IRQS_OFF
  367. jmp int_restore_rest
  368. int_signal:
  369. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  370. jz 1f
  371. movq %rsp,%rdi # &ptregs -> arg1
  372. xorl %esi,%esi # oldset -> arg2
  373. call do_notify_resume
  374. 1: movl $_TIF_NEED_RESCHED,%edi
  375. int_restore_rest:
  376. RESTORE_REST
  377. cli
  378. TRACE_IRQS_OFF
  379. jmp int_with_check
  380. CFI_ENDPROC
  381. END(int_ret_from_sys_call)
  382. /*
  383. * Certain special system calls that need to save a complete full stack frame.
  384. */
  385. .macro PTREGSCALL label,func,arg
  386. .globl \label
  387. \label:
  388. leaq \func(%rip),%rax
  389. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  390. jmp ptregscall_common
  391. END(\label)
  392. .endm
  393. CFI_STARTPROC
  394. PTREGSCALL stub_clone, sys_clone, %r8
  395. PTREGSCALL stub_fork, sys_fork, %rdi
  396. PTREGSCALL stub_vfork, sys_vfork, %rdi
  397. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  398. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  399. PTREGSCALL stub_iopl, sys_iopl, %rsi
  400. ENTRY(ptregscall_common)
  401. popq %r11
  402. CFI_ADJUST_CFA_OFFSET -8
  403. CFI_REGISTER rip, r11
  404. SAVE_REST
  405. movq %r11, %r15
  406. CFI_REGISTER rip, r15
  407. FIXUP_TOP_OF_STACK %r11
  408. call *%rax
  409. RESTORE_TOP_OF_STACK %r11
  410. movq %r15, %r11
  411. CFI_REGISTER rip, r11
  412. RESTORE_REST
  413. pushq %r11
  414. CFI_ADJUST_CFA_OFFSET 8
  415. CFI_REL_OFFSET rip, 0
  416. ret
  417. CFI_ENDPROC
  418. END(ptregscall_common)
  419. ENTRY(stub_execve)
  420. CFI_STARTPROC
  421. popq %r11
  422. CFI_ADJUST_CFA_OFFSET -8
  423. CFI_REGISTER rip, r11
  424. SAVE_REST
  425. FIXUP_TOP_OF_STACK %r11
  426. call sys_execve
  427. RESTORE_TOP_OF_STACK %r11
  428. movq %rax,RAX(%rsp)
  429. RESTORE_REST
  430. jmp int_ret_from_sys_call
  431. CFI_ENDPROC
  432. END(stub_execve)
  433. /*
  434. * sigreturn is special because it needs to restore all registers on return.
  435. * This cannot be done with SYSRET, so use the IRET return path instead.
  436. */
  437. ENTRY(stub_rt_sigreturn)
  438. CFI_STARTPROC
  439. addq $8, %rsp
  440. CFI_ADJUST_CFA_OFFSET -8
  441. SAVE_REST
  442. movq %rsp,%rdi
  443. FIXUP_TOP_OF_STACK %r11
  444. call sys_rt_sigreturn
  445. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  446. RESTORE_REST
  447. jmp int_ret_from_sys_call
  448. CFI_ENDPROC
  449. END(stub_rt_sigreturn)
  450. /*
  451. * initial frame state for interrupts and exceptions
  452. */
  453. .macro _frame ref
  454. CFI_STARTPROC simple
  455. CFI_DEF_CFA rsp,SS+8-\ref
  456. /*CFI_REL_OFFSET ss,SS-\ref*/
  457. CFI_REL_OFFSET rsp,RSP-\ref
  458. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  459. /*CFI_REL_OFFSET cs,CS-\ref*/
  460. CFI_REL_OFFSET rip,RIP-\ref
  461. .endm
  462. /* initial frame state for interrupts (and exceptions without error code) */
  463. #define INTR_FRAME _frame RIP
  464. /* initial frame state for exceptions with error code (and interrupts with
  465. vector already pushed) */
  466. #define XCPT_FRAME _frame ORIG_RAX
  467. /*
  468. * Interrupt entry/exit.
  469. *
  470. * Interrupt entry points save only callee clobbered registers in fast path.
  471. *
  472. * Entry runs with interrupts off.
  473. */
  474. /* 0(%rsp): interrupt number */
  475. .macro interrupt func
  476. cld
  477. SAVE_ARGS
  478. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  479. pushq %rbp
  480. CFI_ADJUST_CFA_OFFSET 8
  481. CFI_REL_OFFSET rbp, 0
  482. movq %rsp,%rbp
  483. CFI_DEF_CFA_REGISTER rbp
  484. testl $3,CS(%rdi)
  485. je 1f
  486. swapgs
  487. 1: incl %gs:pda_irqcount # RED-PEN should check preempt count
  488. cmoveq %gs:pda_irqstackptr,%rsp
  489. push %rbp # backlink for old unwinder
  490. /*
  491. * We entered an interrupt context - irqs are off:
  492. */
  493. TRACE_IRQS_OFF
  494. call \func
  495. .endm
  496. ENTRY(common_interrupt)
  497. XCPT_FRAME
  498. interrupt do_IRQ
  499. /* 0(%rsp): oldrsp-ARGOFFSET */
  500. ret_from_intr:
  501. cli
  502. TRACE_IRQS_OFF
  503. decl %gs:pda_irqcount
  504. leaveq
  505. CFI_DEF_CFA_REGISTER rsp
  506. CFI_ADJUST_CFA_OFFSET -8
  507. exit_intr:
  508. GET_THREAD_INFO(%rcx)
  509. testl $3,CS-ARGOFFSET(%rsp)
  510. je retint_kernel
  511. /* Interrupt came from user space */
  512. /*
  513. * Has a correct top of stack, but a partial stack frame
  514. * %rcx: thread info. Interrupts off.
  515. */
  516. retint_with_reschedule:
  517. movl $_TIF_WORK_MASK,%edi
  518. retint_check:
  519. movl threadinfo_flags(%rcx),%edx
  520. andl %edi,%edx
  521. CFI_REMEMBER_STATE
  522. jnz retint_careful
  523. retint_swapgs:
  524. /*
  525. * The iretq could re-enable interrupts:
  526. */
  527. cli
  528. TRACE_IRQS_IRETQ
  529. swapgs
  530. jmp restore_args
  531. retint_restore_args:
  532. cli
  533. /*
  534. * The iretq could re-enable interrupts:
  535. */
  536. TRACE_IRQS_IRETQ
  537. restore_args:
  538. RESTORE_ARGS 0,8,0
  539. iret_label:
  540. iretq
  541. .section __ex_table,"a"
  542. .quad iret_label,bad_iret
  543. .previous
  544. .section .fixup,"ax"
  545. /* force a signal here? this matches i386 behaviour */
  546. /* running with kernel gs */
  547. bad_iret:
  548. movq $11,%rdi /* SIGSEGV */
  549. TRACE_IRQS_ON
  550. sti
  551. jmp do_exit
  552. .previous
  553. /* edi: workmask, edx: work */
  554. retint_careful:
  555. CFI_RESTORE_STATE
  556. bt $TIF_NEED_RESCHED,%edx
  557. jnc retint_signal
  558. TRACE_IRQS_ON
  559. sti
  560. pushq %rdi
  561. CFI_ADJUST_CFA_OFFSET 8
  562. call schedule
  563. popq %rdi
  564. CFI_ADJUST_CFA_OFFSET -8
  565. GET_THREAD_INFO(%rcx)
  566. cli
  567. TRACE_IRQS_OFF
  568. jmp retint_check
  569. retint_signal:
  570. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  571. jz retint_swapgs
  572. TRACE_IRQS_ON
  573. sti
  574. SAVE_REST
  575. movq $-1,ORIG_RAX(%rsp)
  576. xorl %esi,%esi # oldset
  577. movq %rsp,%rdi # &pt_regs
  578. call do_notify_resume
  579. RESTORE_REST
  580. cli
  581. TRACE_IRQS_OFF
  582. movl $_TIF_NEED_RESCHED,%edi
  583. GET_THREAD_INFO(%rcx)
  584. jmp retint_check
  585. #ifdef CONFIG_PREEMPT
  586. /* Returning to kernel space. Check if we need preemption */
  587. /* rcx: threadinfo. interrupts off. */
  588. ENTRY(retint_kernel)
  589. cmpl $0,threadinfo_preempt_count(%rcx)
  590. jnz retint_restore_args
  591. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  592. jnc retint_restore_args
  593. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  594. jnc retint_restore_args
  595. call preempt_schedule_irq
  596. jmp exit_intr
  597. #endif
  598. CFI_ENDPROC
  599. END(common_interrupt)
  600. /*
  601. * APIC interrupts.
  602. */
  603. .macro apicinterrupt num,func
  604. INTR_FRAME
  605. pushq $~(\num)
  606. CFI_ADJUST_CFA_OFFSET 8
  607. interrupt \func
  608. jmp ret_from_intr
  609. CFI_ENDPROC
  610. .endm
  611. ENTRY(thermal_interrupt)
  612. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  613. END(thermal_interrupt)
  614. ENTRY(threshold_interrupt)
  615. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  616. END(threshold_interrupt)
  617. #ifdef CONFIG_SMP
  618. ENTRY(reschedule_interrupt)
  619. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  620. END(reschedule_interrupt)
  621. .macro INVALIDATE_ENTRY num
  622. ENTRY(invalidate_interrupt\num)
  623. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  624. END(invalidate_interrupt\num)
  625. .endm
  626. INVALIDATE_ENTRY 0
  627. INVALIDATE_ENTRY 1
  628. INVALIDATE_ENTRY 2
  629. INVALIDATE_ENTRY 3
  630. INVALIDATE_ENTRY 4
  631. INVALIDATE_ENTRY 5
  632. INVALIDATE_ENTRY 6
  633. INVALIDATE_ENTRY 7
  634. ENTRY(call_function_interrupt)
  635. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  636. END(call_function_interrupt)
  637. #endif
  638. ENTRY(apic_timer_interrupt)
  639. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  640. END(apic_timer_interrupt)
  641. ENTRY(error_interrupt)
  642. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  643. END(error_interrupt)
  644. ENTRY(spurious_interrupt)
  645. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  646. END(spurious_interrupt)
  647. /*
  648. * Exception entry points.
  649. */
  650. .macro zeroentry sym
  651. INTR_FRAME
  652. pushq $0 /* push error code/oldrax */
  653. CFI_ADJUST_CFA_OFFSET 8
  654. pushq %rax /* push real oldrax to the rdi slot */
  655. CFI_ADJUST_CFA_OFFSET 8
  656. leaq \sym(%rip),%rax
  657. jmp error_entry
  658. CFI_ENDPROC
  659. .endm
  660. .macro errorentry sym
  661. XCPT_FRAME
  662. pushq %rax
  663. CFI_ADJUST_CFA_OFFSET 8
  664. leaq \sym(%rip),%rax
  665. jmp error_entry
  666. CFI_ENDPROC
  667. .endm
  668. /* error code is on the stack already */
  669. /* handle NMI like exceptions that can happen everywhere */
  670. .macro paranoidentry sym, ist=0, irqtrace=1
  671. SAVE_ALL
  672. cld
  673. movl $1,%ebx
  674. movl $MSR_GS_BASE,%ecx
  675. rdmsr
  676. testl %edx,%edx
  677. js 1f
  678. swapgs
  679. xorl %ebx,%ebx
  680. 1:
  681. .if \ist
  682. movq %gs:pda_data_offset, %rbp
  683. .endif
  684. movq %rsp,%rdi
  685. movq ORIG_RAX(%rsp),%rsi
  686. movq $-1,ORIG_RAX(%rsp)
  687. .if \ist
  688. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  689. .endif
  690. call \sym
  691. .if \ist
  692. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  693. .endif
  694. cli
  695. .if \irqtrace
  696. TRACE_IRQS_OFF
  697. .endif
  698. .endm
  699. /*
  700. * "Paranoid" exit path from exception stack.
  701. * Paranoid because this is used by NMIs and cannot take
  702. * any kernel state for granted.
  703. * We don't do kernel preemption checks here, because only
  704. * NMI should be common and it does not enable IRQs and
  705. * cannot get reschedule ticks.
  706. *
  707. * "trace" is 0 for the NMI handler only, because irq-tracing
  708. * is fundamentally NMI-unsafe. (we cannot change the soft and
  709. * hard flags at once, atomically)
  710. */
  711. .macro paranoidexit trace=1
  712. /* ebx: no swapgs flag */
  713. paranoid_exit\trace:
  714. testl %ebx,%ebx /* swapgs needed? */
  715. jnz paranoid_restore\trace
  716. testl $3,CS(%rsp)
  717. jnz paranoid_userspace\trace
  718. paranoid_swapgs\trace:
  719. TRACE_IRQS_IRETQ 0
  720. swapgs
  721. paranoid_restore\trace:
  722. RESTORE_ALL 8
  723. iretq
  724. paranoid_userspace\trace:
  725. GET_THREAD_INFO(%rcx)
  726. movl threadinfo_flags(%rcx),%ebx
  727. andl $_TIF_WORK_MASK,%ebx
  728. jz paranoid_swapgs\trace
  729. movq %rsp,%rdi /* &pt_regs */
  730. call sync_regs
  731. movq %rax,%rsp /* switch stack for scheduling */
  732. testl $_TIF_NEED_RESCHED,%ebx
  733. jnz paranoid_schedule\trace
  734. movl %ebx,%edx /* arg3: thread flags */
  735. .if \trace
  736. TRACE_IRQS_ON
  737. .endif
  738. sti
  739. xorl %esi,%esi /* arg2: oldset */
  740. movq %rsp,%rdi /* arg1: &pt_regs */
  741. call do_notify_resume
  742. cli
  743. .if \trace
  744. TRACE_IRQS_OFF
  745. .endif
  746. jmp paranoid_userspace\trace
  747. paranoid_schedule\trace:
  748. .if \trace
  749. TRACE_IRQS_ON
  750. .endif
  751. sti
  752. call schedule
  753. cli
  754. .if \trace
  755. TRACE_IRQS_OFF
  756. .endif
  757. jmp paranoid_userspace\trace
  758. CFI_ENDPROC
  759. .endm
  760. /*
  761. * Exception entry point. This expects an error code/orig_rax on the stack
  762. * and the exception handler in %rax.
  763. */
  764. KPROBE_ENTRY(error_entry)
  765. _frame RDI
  766. /* rdi slot contains rax, oldrax contains error code */
  767. cld
  768. subq $14*8,%rsp
  769. CFI_ADJUST_CFA_OFFSET (14*8)
  770. movq %rsi,13*8(%rsp)
  771. CFI_REL_OFFSET rsi,RSI
  772. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  773. movq %rdx,12*8(%rsp)
  774. CFI_REL_OFFSET rdx,RDX
  775. movq %rcx,11*8(%rsp)
  776. CFI_REL_OFFSET rcx,RCX
  777. movq %rsi,10*8(%rsp) /* store rax */
  778. CFI_REL_OFFSET rax,RAX
  779. movq %r8, 9*8(%rsp)
  780. CFI_REL_OFFSET r8,R8
  781. movq %r9, 8*8(%rsp)
  782. CFI_REL_OFFSET r9,R9
  783. movq %r10,7*8(%rsp)
  784. CFI_REL_OFFSET r10,R10
  785. movq %r11,6*8(%rsp)
  786. CFI_REL_OFFSET r11,R11
  787. movq %rbx,5*8(%rsp)
  788. CFI_REL_OFFSET rbx,RBX
  789. movq %rbp,4*8(%rsp)
  790. CFI_REL_OFFSET rbp,RBP
  791. movq %r12,3*8(%rsp)
  792. CFI_REL_OFFSET r12,R12
  793. movq %r13,2*8(%rsp)
  794. CFI_REL_OFFSET r13,R13
  795. movq %r14,1*8(%rsp)
  796. CFI_REL_OFFSET r14,R14
  797. movq %r15,(%rsp)
  798. CFI_REL_OFFSET r15,R15
  799. xorl %ebx,%ebx
  800. testl $3,CS(%rsp)
  801. je error_kernelspace
  802. error_swapgs:
  803. swapgs
  804. error_sti:
  805. movq %rdi,RDI(%rsp)
  806. movq %rsp,%rdi
  807. movq ORIG_RAX(%rsp),%rsi /* get error code */
  808. movq $-1,ORIG_RAX(%rsp)
  809. call *%rax
  810. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  811. error_exit:
  812. movl %ebx,%eax
  813. RESTORE_REST
  814. cli
  815. TRACE_IRQS_OFF
  816. GET_THREAD_INFO(%rcx)
  817. testl %eax,%eax
  818. jne retint_kernel
  819. movl threadinfo_flags(%rcx),%edx
  820. movl $_TIF_WORK_MASK,%edi
  821. andl %edi,%edx
  822. jnz retint_careful
  823. /*
  824. * The iret might restore flags:
  825. */
  826. TRACE_IRQS_IRETQ
  827. swapgs
  828. RESTORE_ARGS 0,8,0
  829. jmp iret_label
  830. CFI_ENDPROC
  831. error_kernelspace:
  832. incl %ebx
  833. /* There are two places in the kernel that can potentially fault with
  834. usergs. Handle them here. The exception handlers after
  835. iret run with kernel gs again, so don't set the user space flag.
  836. B stepping K8s sometimes report an truncated RIP for IRET
  837. exceptions returning to compat mode. Check for these here too. */
  838. leaq iret_label(%rip),%rbp
  839. cmpq %rbp,RIP(%rsp)
  840. je error_swapgs
  841. movl %ebp,%ebp /* zero extend */
  842. cmpq %rbp,RIP(%rsp)
  843. je error_swapgs
  844. cmpq $gs_change,RIP(%rsp)
  845. je error_swapgs
  846. jmp error_sti
  847. KPROBE_END(error_entry)
  848. /* Reload gs selector with exception handling */
  849. /* edi: new selector */
  850. ENTRY(load_gs_index)
  851. CFI_STARTPROC
  852. pushf
  853. CFI_ADJUST_CFA_OFFSET 8
  854. cli
  855. swapgs
  856. gs_change:
  857. movl %edi,%gs
  858. 2: mfence /* workaround */
  859. swapgs
  860. popf
  861. CFI_ADJUST_CFA_OFFSET -8
  862. ret
  863. CFI_ENDPROC
  864. ENDPROC(load_gs_index)
  865. .section __ex_table,"a"
  866. .align 8
  867. .quad gs_change,bad_gs
  868. .previous
  869. .section .fixup,"ax"
  870. /* running with kernelgs */
  871. bad_gs:
  872. swapgs /* switch back to user gs */
  873. xorl %eax,%eax
  874. movl %eax,%gs
  875. jmp 2b
  876. .previous
  877. /*
  878. * Create a kernel thread.
  879. *
  880. * C extern interface:
  881. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  882. *
  883. * asm input arguments:
  884. * rdi: fn, rsi: arg, rdx: flags
  885. */
  886. ENTRY(kernel_thread)
  887. CFI_STARTPROC
  888. FAKE_STACK_FRAME $child_rip
  889. SAVE_ALL
  890. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  891. movq %rdx,%rdi
  892. orq kernel_thread_flags(%rip),%rdi
  893. movq $-1, %rsi
  894. movq %rsp, %rdx
  895. xorl %r8d,%r8d
  896. xorl %r9d,%r9d
  897. # clone now
  898. call do_fork
  899. movq %rax,RAX(%rsp)
  900. xorl %edi,%edi
  901. /*
  902. * It isn't worth to check for reschedule here,
  903. * so internally to the x86_64 port you can rely on kernel_thread()
  904. * not to reschedule the child before returning, this avoids the need
  905. * of hacks for example to fork off the per-CPU idle tasks.
  906. * [Hopefully no generic code relies on the reschedule -AK]
  907. */
  908. RESTORE_ALL
  909. UNFAKE_STACK_FRAME
  910. ret
  911. CFI_ENDPROC
  912. ENDPROC(kernel_thread)
  913. child_rip:
  914. pushq $0 # fake return address
  915. CFI_STARTPROC
  916. /*
  917. * Here we are in the child and the registers are set as they were
  918. * at kernel_thread() invocation in the parent.
  919. */
  920. movq %rdi, %rax
  921. movq %rsi, %rdi
  922. call *%rax
  923. # exit
  924. xorl %edi, %edi
  925. call do_exit
  926. CFI_ENDPROC
  927. ENDPROC(child_rip)
  928. /*
  929. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  930. *
  931. * C extern interface:
  932. * extern long execve(char *name, char **argv, char **envp)
  933. *
  934. * asm input arguments:
  935. * rdi: name, rsi: argv, rdx: envp
  936. *
  937. * We want to fallback into:
  938. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  939. *
  940. * do_sys_execve asm fallback arguments:
  941. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  942. */
  943. ENTRY(execve)
  944. CFI_STARTPROC
  945. FAKE_STACK_FRAME $0
  946. SAVE_ALL
  947. call sys_execve
  948. movq %rax, RAX(%rsp)
  949. RESTORE_REST
  950. testq %rax,%rax
  951. je int_ret_from_sys_call
  952. RESTORE_ARGS
  953. UNFAKE_STACK_FRAME
  954. ret
  955. CFI_ENDPROC
  956. ENDPROC(execve)
  957. KPROBE_ENTRY(page_fault)
  958. errorentry do_page_fault
  959. KPROBE_END(page_fault)
  960. ENTRY(coprocessor_error)
  961. zeroentry do_coprocessor_error
  962. END(coprocessor_error)
  963. ENTRY(simd_coprocessor_error)
  964. zeroentry do_simd_coprocessor_error
  965. END(simd_coprocessor_error)
  966. ENTRY(device_not_available)
  967. zeroentry math_state_restore
  968. END(device_not_available)
  969. /* runs on exception stack */
  970. KPROBE_ENTRY(debug)
  971. INTR_FRAME
  972. pushq $0
  973. CFI_ADJUST_CFA_OFFSET 8
  974. paranoidentry do_debug, DEBUG_STACK
  975. paranoidexit
  976. KPROBE_END(debug)
  977. /* runs on exception stack */
  978. KPROBE_ENTRY(nmi)
  979. INTR_FRAME
  980. pushq $-1
  981. CFI_ADJUST_CFA_OFFSET 8
  982. paranoidentry do_nmi, 0, 0
  983. #ifdef CONFIG_TRACE_IRQFLAGS
  984. paranoidexit 0
  985. #else
  986. jmp paranoid_exit1
  987. CFI_ENDPROC
  988. #endif
  989. KPROBE_END(nmi)
  990. KPROBE_ENTRY(int3)
  991. INTR_FRAME
  992. pushq $0
  993. CFI_ADJUST_CFA_OFFSET 8
  994. paranoidentry do_int3, DEBUG_STACK
  995. jmp paranoid_exit1
  996. CFI_ENDPROC
  997. KPROBE_END(int3)
  998. ENTRY(overflow)
  999. zeroentry do_overflow
  1000. END(overflow)
  1001. ENTRY(bounds)
  1002. zeroentry do_bounds
  1003. END(bounds)
  1004. ENTRY(invalid_op)
  1005. zeroentry do_invalid_op
  1006. END(invalid_op)
  1007. ENTRY(coprocessor_segment_overrun)
  1008. zeroentry do_coprocessor_segment_overrun
  1009. END(coprocessor_segment_overrun)
  1010. ENTRY(reserved)
  1011. zeroentry do_reserved
  1012. END(reserved)
  1013. /* runs on exception stack */
  1014. ENTRY(double_fault)
  1015. XCPT_FRAME
  1016. paranoidentry do_double_fault
  1017. jmp paranoid_exit1
  1018. CFI_ENDPROC
  1019. END(double_fault)
  1020. ENTRY(invalid_TSS)
  1021. errorentry do_invalid_TSS
  1022. END(invalid_TSS)
  1023. ENTRY(segment_not_present)
  1024. errorentry do_segment_not_present
  1025. END(segment_not_present)
  1026. /* runs on exception stack */
  1027. ENTRY(stack_segment)
  1028. XCPT_FRAME
  1029. paranoidentry do_stack_segment
  1030. jmp paranoid_exit1
  1031. CFI_ENDPROC
  1032. END(stack_segment)
  1033. KPROBE_ENTRY(general_protection)
  1034. errorentry do_general_protection
  1035. KPROBE_END(general_protection)
  1036. ENTRY(alignment_check)
  1037. errorentry do_alignment_check
  1038. END(alignment_check)
  1039. ENTRY(divide_error)
  1040. zeroentry do_divide_error
  1041. END(divide_error)
  1042. ENTRY(spurious_interrupt_bug)
  1043. zeroentry do_spurious_interrupt_bug
  1044. END(spurious_interrupt_bug)
  1045. #ifdef CONFIG_X86_MCE
  1046. /* runs on exception stack */
  1047. ENTRY(machine_check)
  1048. INTR_FRAME
  1049. pushq $0
  1050. CFI_ADJUST_CFA_OFFSET 8
  1051. paranoidentry do_machine_check
  1052. jmp paranoid_exit1
  1053. CFI_ENDPROC
  1054. END(machine_check)
  1055. #endif
  1056. /* Call softirq on interrupt stack. Interrupts are off. */
  1057. ENTRY(call_softirq)
  1058. CFI_STARTPROC
  1059. push %rbp
  1060. CFI_ADJUST_CFA_OFFSET 8
  1061. CFI_REL_OFFSET rbp,0
  1062. mov %rsp,%rbp
  1063. CFI_DEF_CFA_REGISTER rbp
  1064. incl %gs:pda_irqcount
  1065. cmove %gs:pda_irqstackptr,%rsp
  1066. push %rbp # backlink for old unwinder
  1067. call __do_softirq
  1068. leaveq
  1069. CFI_DEF_CFA_REGISTER rsp
  1070. CFI_ADJUST_CFA_OFFSET -8
  1071. decl %gs:pda_irqcount
  1072. ret
  1073. CFI_ENDPROC
  1074. ENDPROC(call_softirq)
  1075. #ifdef CONFIG_STACK_UNWIND
  1076. ENTRY(arch_unwind_init_running)
  1077. CFI_STARTPROC
  1078. movq %r15, R15(%rdi)
  1079. movq %r14, R14(%rdi)
  1080. xchgq %rsi, %rdx
  1081. movq %r13, R13(%rdi)
  1082. movq %r12, R12(%rdi)
  1083. xorl %eax, %eax
  1084. movq %rbp, RBP(%rdi)
  1085. movq %rbx, RBX(%rdi)
  1086. movq (%rsp), %rcx
  1087. movq %rax, R11(%rdi)
  1088. movq %rax, R10(%rdi)
  1089. movq %rax, R9(%rdi)
  1090. movq %rax, R8(%rdi)
  1091. movq %rax, RAX(%rdi)
  1092. movq %rax, RCX(%rdi)
  1093. movq %rax, RDX(%rdi)
  1094. movq %rax, RSI(%rdi)
  1095. movq %rax, RDI(%rdi)
  1096. movq %rax, ORIG_RAX(%rdi)
  1097. movq %rcx, RIP(%rdi)
  1098. leaq 8(%rsp), %rcx
  1099. movq $__KERNEL_CS, CS(%rdi)
  1100. movq %rax, EFLAGS(%rdi)
  1101. movq %rcx, RSP(%rdi)
  1102. movq $__KERNEL_DS, SS(%rdi)
  1103. jmpq *%rdx
  1104. CFI_ENDPROC
  1105. ENDPROC(arch_unwind_init_running)
  1106. #endif