paravirt.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576
  1. /* Paravirtualization interfaces
  2. Copyright (C) 2006 Rusty Russell IBM Corporation
  3. This program is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program; if not, write to the Free Software
  13. Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  14. */
  15. #include <linux/errno.h>
  16. #include <linux/module.h>
  17. #include <linux/efi.h>
  18. #include <linux/bcd.h>
  19. #include <linux/start_kernel.h>
  20. #include <asm/bug.h>
  21. #include <asm/paravirt.h>
  22. #include <asm/desc.h>
  23. #include <asm/setup.h>
  24. #include <asm/arch_hooks.h>
  25. #include <asm/time.h>
  26. #include <asm/irq.h>
  27. #include <asm/delay.h>
  28. #include <asm/fixmap.h>
  29. #include <asm/apic.h>
  30. #include <asm/tlbflush.h>
  31. /* nop stub */
  32. static void native_nop(void)
  33. {
  34. }
  35. static void __init default_banner(void)
  36. {
  37. printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
  38. paravirt_ops.name);
  39. }
  40. char *memory_setup(void)
  41. {
  42. return paravirt_ops.memory_setup();
  43. }
  44. /* Simple instruction patching code. */
  45. #define DEF_NATIVE(name, code) \
  46. extern const char start_##name[], end_##name[]; \
  47. asm("start_" #name ": " code "; end_" #name ":")
  48. DEF_NATIVE(cli, "cli");
  49. DEF_NATIVE(sti, "sti");
  50. DEF_NATIVE(popf, "push %eax; popf");
  51. DEF_NATIVE(pushf, "pushf; pop %eax");
  52. DEF_NATIVE(pushf_cli, "pushf; pop %eax; cli");
  53. DEF_NATIVE(iret, "iret");
  54. DEF_NATIVE(sti_sysexit, "sti; sysexit");
  55. static const struct native_insns
  56. {
  57. const char *start, *end;
  58. } native_insns[] = {
  59. [PARAVIRT_IRQ_DISABLE] = { start_cli, end_cli },
  60. [PARAVIRT_IRQ_ENABLE] = { start_sti, end_sti },
  61. [PARAVIRT_RESTORE_FLAGS] = { start_popf, end_popf },
  62. [PARAVIRT_SAVE_FLAGS] = { start_pushf, end_pushf },
  63. [PARAVIRT_SAVE_FLAGS_IRQ_DISABLE] = { start_pushf_cli, end_pushf_cli },
  64. [PARAVIRT_INTERRUPT_RETURN] = { start_iret, end_iret },
  65. [PARAVIRT_STI_SYSEXIT] = { start_sti_sysexit, end_sti_sysexit },
  66. };
  67. static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
  68. {
  69. unsigned int insn_len;
  70. /* Don't touch it if we don't have a replacement */
  71. if (type >= ARRAY_SIZE(native_insns) || !native_insns[type].start)
  72. return len;
  73. insn_len = native_insns[type].end - native_insns[type].start;
  74. /* Similarly if we can't fit replacement. */
  75. if (len < insn_len)
  76. return len;
  77. memcpy(insns, native_insns[type].start, insn_len);
  78. return insn_len;
  79. }
  80. static fastcall unsigned long native_get_debugreg(int regno)
  81. {
  82. unsigned long val = 0; /* Damn you, gcc! */
  83. switch (regno) {
  84. case 0:
  85. asm("movl %%db0, %0" :"=r" (val)); break;
  86. case 1:
  87. asm("movl %%db1, %0" :"=r" (val)); break;
  88. case 2:
  89. asm("movl %%db2, %0" :"=r" (val)); break;
  90. case 3:
  91. asm("movl %%db3, %0" :"=r" (val)); break;
  92. case 6:
  93. asm("movl %%db6, %0" :"=r" (val)); break;
  94. case 7:
  95. asm("movl %%db7, %0" :"=r" (val)); break;
  96. default:
  97. BUG();
  98. }
  99. return val;
  100. }
  101. static fastcall void native_set_debugreg(int regno, unsigned long value)
  102. {
  103. switch (regno) {
  104. case 0:
  105. asm("movl %0,%%db0" : /* no output */ :"r" (value));
  106. break;
  107. case 1:
  108. asm("movl %0,%%db1" : /* no output */ :"r" (value));
  109. break;
  110. case 2:
  111. asm("movl %0,%%db2" : /* no output */ :"r" (value));
  112. break;
  113. case 3:
  114. asm("movl %0,%%db3" : /* no output */ :"r" (value));
  115. break;
  116. case 6:
  117. asm("movl %0,%%db6" : /* no output */ :"r" (value));
  118. break;
  119. case 7:
  120. asm("movl %0,%%db7" : /* no output */ :"r" (value));
  121. break;
  122. default:
  123. BUG();
  124. }
  125. }
  126. void init_IRQ(void)
  127. {
  128. paravirt_ops.init_IRQ();
  129. }
  130. static fastcall void native_clts(void)
  131. {
  132. asm volatile ("clts");
  133. }
  134. static fastcall unsigned long native_read_cr0(void)
  135. {
  136. unsigned long val;
  137. asm volatile("movl %%cr0,%0\n\t" :"=r" (val));
  138. return val;
  139. }
  140. static fastcall void native_write_cr0(unsigned long val)
  141. {
  142. asm volatile("movl %0,%%cr0": :"r" (val));
  143. }
  144. static fastcall unsigned long native_read_cr2(void)
  145. {
  146. unsigned long val;
  147. asm volatile("movl %%cr2,%0\n\t" :"=r" (val));
  148. return val;
  149. }
  150. static fastcall void native_write_cr2(unsigned long val)
  151. {
  152. asm volatile("movl %0,%%cr2": :"r" (val));
  153. }
  154. static fastcall unsigned long native_read_cr3(void)
  155. {
  156. unsigned long val;
  157. asm volatile("movl %%cr3,%0\n\t" :"=r" (val));
  158. return val;
  159. }
  160. static fastcall void native_write_cr3(unsigned long val)
  161. {
  162. asm volatile("movl %0,%%cr3": :"r" (val));
  163. }
  164. static fastcall unsigned long native_read_cr4(void)
  165. {
  166. unsigned long val;
  167. asm volatile("movl %%cr4,%0\n\t" :"=r" (val));
  168. return val;
  169. }
  170. static fastcall unsigned long native_read_cr4_safe(void)
  171. {
  172. unsigned long val;
  173. /* This could fault if %cr4 does not exist */
  174. asm("1: movl %%cr4, %0 \n"
  175. "2: \n"
  176. ".section __ex_table,\"a\" \n"
  177. ".long 1b,2b \n"
  178. ".previous \n"
  179. : "=r" (val): "0" (0));
  180. return val;
  181. }
  182. static fastcall void native_write_cr4(unsigned long val)
  183. {
  184. asm volatile("movl %0,%%cr4": :"r" (val));
  185. }
  186. static fastcall unsigned long native_save_fl(void)
  187. {
  188. unsigned long f;
  189. asm volatile("pushfl ; popl %0":"=g" (f): /* no input */);
  190. return f;
  191. }
  192. static fastcall void native_restore_fl(unsigned long f)
  193. {
  194. asm volatile("pushl %0 ; popfl": /* no output */
  195. :"g" (f)
  196. :"memory", "cc");
  197. }
  198. static fastcall void native_irq_disable(void)
  199. {
  200. asm volatile("cli": : :"memory");
  201. }
  202. static fastcall void native_irq_enable(void)
  203. {
  204. asm volatile("sti": : :"memory");
  205. }
  206. static fastcall void native_safe_halt(void)
  207. {
  208. asm volatile("sti; hlt": : :"memory");
  209. }
  210. static fastcall void native_halt(void)
  211. {
  212. asm volatile("hlt": : :"memory");
  213. }
  214. static fastcall void native_wbinvd(void)
  215. {
  216. asm volatile("wbinvd": : :"memory");
  217. }
  218. static fastcall unsigned long long native_read_msr(unsigned int msr, int *err)
  219. {
  220. unsigned long long val;
  221. asm volatile("2: rdmsr ; xorl %0,%0\n"
  222. "1:\n\t"
  223. ".section .fixup,\"ax\"\n\t"
  224. "3: movl %3,%0 ; jmp 1b\n\t"
  225. ".previous\n\t"
  226. ".section __ex_table,\"a\"\n"
  227. " .align 4\n\t"
  228. " .long 2b,3b\n\t"
  229. ".previous"
  230. : "=r" (*err), "=A" (val)
  231. : "c" (msr), "i" (-EFAULT));
  232. return val;
  233. }
  234. static fastcall int native_write_msr(unsigned int msr, unsigned long long val)
  235. {
  236. int err;
  237. asm volatile("2: wrmsr ; xorl %0,%0\n"
  238. "1:\n\t"
  239. ".section .fixup,\"ax\"\n\t"
  240. "3: movl %4,%0 ; jmp 1b\n\t"
  241. ".previous\n\t"
  242. ".section __ex_table,\"a\"\n"
  243. " .align 4\n\t"
  244. " .long 2b,3b\n\t"
  245. ".previous"
  246. : "=a" (err)
  247. : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)),
  248. "i" (-EFAULT));
  249. return err;
  250. }
  251. static fastcall unsigned long long native_read_tsc(void)
  252. {
  253. unsigned long long val;
  254. asm volatile("rdtsc" : "=A" (val));
  255. return val;
  256. }
  257. static fastcall unsigned long long native_read_pmc(void)
  258. {
  259. unsigned long long val;
  260. asm volatile("rdpmc" : "=A" (val));
  261. return val;
  262. }
  263. static fastcall void native_load_tr_desc(void)
  264. {
  265. asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
  266. }
  267. static fastcall void native_load_gdt(const struct Xgt_desc_struct *dtr)
  268. {
  269. asm volatile("lgdt %0"::"m" (*dtr));
  270. }
  271. static fastcall void native_load_idt(const struct Xgt_desc_struct *dtr)
  272. {
  273. asm volatile("lidt %0"::"m" (*dtr));
  274. }
  275. static fastcall void native_store_gdt(struct Xgt_desc_struct *dtr)
  276. {
  277. asm ("sgdt %0":"=m" (*dtr));
  278. }
  279. static fastcall void native_store_idt(struct Xgt_desc_struct *dtr)
  280. {
  281. asm ("sidt %0":"=m" (*dtr));
  282. }
  283. static fastcall unsigned long native_store_tr(void)
  284. {
  285. unsigned long tr;
  286. asm ("str %0":"=r" (tr));
  287. return tr;
  288. }
  289. static fastcall void native_load_tls(struct thread_struct *t, unsigned int cpu)
  290. {
  291. #define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
  292. C(0); C(1); C(2);
  293. #undef C
  294. }
  295. static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32 entry_high)
  296. {
  297. u32 *lp = (u32 *)((char *)dt + entry*8);
  298. lp[0] = entry_low;
  299. lp[1] = entry_high;
  300. }
  301. static fastcall void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high)
  302. {
  303. native_write_dt_entry(dt, entrynum, low, high);
  304. }
  305. static fastcall void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high)
  306. {
  307. native_write_dt_entry(dt, entrynum, low, high);
  308. }
  309. static fastcall void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high)
  310. {
  311. native_write_dt_entry(dt, entrynum, low, high);
  312. }
  313. static fastcall void native_load_esp0(struct tss_struct *tss,
  314. struct thread_struct *thread)
  315. {
  316. tss->esp0 = thread->esp0;
  317. /* This can only happen when SEP is enabled, no need to test "SEP"arately */
  318. if (unlikely(tss->ss1 != thread->sysenter_cs)) {
  319. tss->ss1 = thread->sysenter_cs;
  320. wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
  321. }
  322. }
  323. static fastcall void native_io_delay(void)
  324. {
  325. asm volatile("outb %al,$0x80");
  326. }
  327. static fastcall void native_flush_tlb(void)
  328. {
  329. __native_flush_tlb();
  330. }
  331. /*
  332. * Global pages have to be flushed a bit differently. Not a real
  333. * performance problem because this does not happen often.
  334. */
  335. static fastcall void native_flush_tlb_global(void)
  336. {
  337. __native_flush_tlb_global();
  338. }
  339. static fastcall void native_flush_tlb_single(u32 addr)
  340. {
  341. __native_flush_tlb_single(addr);
  342. }
  343. #ifndef CONFIG_X86_PAE
  344. static fastcall void native_set_pte(pte_t *ptep, pte_t pteval)
  345. {
  346. *ptep = pteval;
  347. }
  348. static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval)
  349. {
  350. *ptep = pteval;
  351. }
  352. static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
  353. {
  354. *pmdp = pmdval;
  355. }
  356. #else /* CONFIG_X86_PAE */
  357. static fastcall void native_set_pte(pte_t *ptep, pte_t pte)
  358. {
  359. ptep->pte_high = pte.pte_high;
  360. smp_wmb();
  361. ptep->pte_low = pte.pte_low;
  362. }
  363. static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte)
  364. {
  365. ptep->pte_high = pte.pte_high;
  366. smp_wmb();
  367. ptep->pte_low = pte.pte_low;
  368. }
  369. static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
  370. {
  371. ptep->pte_low = 0;
  372. smp_wmb();
  373. ptep->pte_high = pte.pte_high;
  374. smp_wmb();
  375. ptep->pte_low = pte.pte_low;
  376. }
  377. static fastcall void native_set_pte_atomic(pte_t *ptep, pte_t pteval)
  378. {
  379. set_64bit((unsigned long long *)ptep,pte_val(pteval));
  380. }
  381. static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
  382. {
  383. set_64bit((unsigned long long *)pmdp,pmd_val(pmdval));
  384. }
  385. static fastcall void native_set_pud(pud_t *pudp, pud_t pudval)
  386. {
  387. *pudp = pudval;
  388. }
  389. static fastcall void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
  390. {
  391. ptep->pte_low = 0;
  392. smp_wmb();
  393. ptep->pte_high = 0;
  394. }
  395. static fastcall void native_pmd_clear(pmd_t *pmd)
  396. {
  397. u32 *tmp = (u32 *)pmd;
  398. *tmp = 0;
  399. smp_wmb();
  400. *(tmp + 1) = 0;
  401. }
  402. #endif /* CONFIG_X86_PAE */
  403. /* These are in entry.S */
  404. extern fastcall void native_iret(void);
  405. extern fastcall void native_irq_enable_sysexit(void);
  406. static int __init print_banner(void)
  407. {
  408. paravirt_ops.banner();
  409. return 0;
  410. }
  411. core_initcall(print_banner);
  412. /* We simply declare start_kernel to be the paravirt probe of last resort. */
  413. paravirt_probe(start_kernel);
  414. struct paravirt_ops paravirt_ops = {
  415. .name = "bare hardware",
  416. .paravirt_enabled = 0,
  417. .kernel_rpl = 0,
  418. .patch = native_patch,
  419. .banner = default_banner,
  420. .arch_setup = native_nop,
  421. .memory_setup = machine_specific_memory_setup,
  422. .get_wallclock = native_get_wallclock,
  423. .set_wallclock = native_set_wallclock,
  424. .time_init = time_init_hook,
  425. .init_IRQ = native_init_IRQ,
  426. .cpuid = native_cpuid,
  427. .get_debugreg = native_get_debugreg,
  428. .set_debugreg = native_set_debugreg,
  429. .clts = native_clts,
  430. .read_cr0 = native_read_cr0,
  431. .write_cr0 = native_write_cr0,
  432. .read_cr2 = native_read_cr2,
  433. .write_cr2 = native_write_cr2,
  434. .read_cr3 = native_read_cr3,
  435. .write_cr3 = native_write_cr3,
  436. .read_cr4 = native_read_cr4,
  437. .read_cr4_safe = native_read_cr4_safe,
  438. .write_cr4 = native_write_cr4,
  439. .save_fl = native_save_fl,
  440. .restore_fl = native_restore_fl,
  441. .irq_disable = native_irq_disable,
  442. .irq_enable = native_irq_enable,
  443. .safe_halt = native_safe_halt,
  444. .halt = native_halt,
  445. .wbinvd = native_wbinvd,
  446. .read_msr = native_read_msr,
  447. .write_msr = native_write_msr,
  448. .read_tsc = native_read_tsc,
  449. .read_pmc = native_read_pmc,
  450. .load_tr_desc = native_load_tr_desc,
  451. .set_ldt = native_set_ldt,
  452. .load_gdt = native_load_gdt,
  453. .load_idt = native_load_idt,
  454. .store_gdt = native_store_gdt,
  455. .store_idt = native_store_idt,
  456. .store_tr = native_store_tr,
  457. .load_tls = native_load_tls,
  458. .write_ldt_entry = native_write_ldt_entry,
  459. .write_gdt_entry = native_write_gdt_entry,
  460. .write_idt_entry = native_write_idt_entry,
  461. .load_esp0 = native_load_esp0,
  462. .set_iopl_mask = native_set_iopl_mask,
  463. .io_delay = native_io_delay,
  464. .const_udelay = __const_udelay,
  465. #ifdef CONFIG_X86_LOCAL_APIC
  466. .apic_write = native_apic_write,
  467. .apic_write_atomic = native_apic_write_atomic,
  468. .apic_read = native_apic_read,
  469. #endif
  470. .flush_tlb_user = native_flush_tlb,
  471. .flush_tlb_kernel = native_flush_tlb_global,
  472. .flush_tlb_single = native_flush_tlb_single,
  473. .set_pte = native_set_pte,
  474. .set_pte_at = native_set_pte_at,
  475. .set_pmd = native_set_pmd,
  476. .pte_update = (void *)native_nop,
  477. .pte_update_defer = (void *)native_nop,
  478. #ifdef CONFIG_X86_PAE
  479. .set_pte_atomic = native_set_pte_atomic,
  480. .set_pte_present = native_set_pte_present,
  481. .set_pud = native_set_pud,
  482. .pte_clear = native_pte_clear,
  483. .pmd_clear = native_pmd_clear,
  484. #endif
  485. .irq_enable_sysexit = native_irq_enable_sysexit,
  486. .iret = native_iret,
  487. };
  488. /*
  489. * NOTE: CONFIG_PARAVIRT is experimental and the paravirt_ops
  490. * semantics are subject to change. Hence we only do this
  491. * internal-only export of this, until it gets sorted out and
  492. * all lowlevel CPU ops used by modules are separately exported.
  493. */
  494. EXPORT_SYMBOL_GPL(paravirt_ops);