book3s_hv.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989
  1. /*
  2. * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
  3. * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
  4. *
  5. * Authors:
  6. * Paul Mackerras <paulus@au1.ibm.com>
  7. * Alexander Graf <agraf@suse.de>
  8. * Kevin Wolf <mail@kevin-wolf.de>
  9. *
  10. * Description: KVM functions specific to running on Book 3S
  11. * processors in hypervisor mode (specifically POWER7 and later).
  12. *
  13. * This file is derived from arch/powerpc/kvm/book3s.c,
  14. * by Alexander Graf <agraf@suse.de>.
  15. *
  16. * This program is free software; you can redistribute it and/or modify
  17. * it under the terms of the GNU General Public License, version 2, as
  18. * published by the Free Software Foundation.
  19. */
  20. #include <linux/kvm_host.h>
  21. #include <linux/err.h>
  22. #include <linux/slab.h>
  23. #include <linux/preempt.h>
  24. #include <linux/sched.h>
  25. #include <linux/delay.h>
  26. #include <linux/fs.h>
  27. #include <linux/anon_inodes.h>
  28. #include <linux/cpumask.h>
  29. #include <asm/reg.h>
  30. #include <asm/cputable.h>
  31. #include <asm/cacheflush.h>
  32. #include <asm/tlbflush.h>
  33. #include <asm/uaccess.h>
  34. #include <asm/io.h>
  35. #include <asm/kvm_ppc.h>
  36. #include <asm/kvm_book3s.h>
  37. #include <asm/mmu_context.h>
  38. #include <asm/lppaca.h>
  39. #include <asm/processor.h>
  40. #include <asm/cputhreads.h>
  41. #include <linux/gfp.h>
  42. #include <linux/sched.h>
  43. #include <linux/vmalloc.h>
  44. #include <linux/highmem.h>
  45. /* #define EXIT_DEBUG */
  46. /* #define EXIT_DEBUG_SIMPLE */
  47. /* #define EXIT_DEBUG_INT */
  48. void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  49. {
  50. local_paca->kvm_hstate.kvm_vcpu = vcpu;
  51. local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore;
  52. }
  53. void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
  54. {
  55. }
  56. static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu);
  57. static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu);
  58. void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
  59. {
  60. u64 now;
  61. unsigned long dec_nsec;
  62. now = get_tb();
  63. if (now >= vcpu->arch.dec_expires && !kvmppc_core_pending_dec(vcpu))
  64. kvmppc_core_queue_dec(vcpu);
  65. if (vcpu->arch.pending_exceptions)
  66. return;
  67. if (vcpu->arch.dec_expires != ~(u64)0) {
  68. dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC /
  69. tb_ticks_per_sec;
  70. hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
  71. HRTIMER_MODE_REL);
  72. }
  73. kvmppc_vcpu_blocked(vcpu);
  74. kvm_vcpu_block(vcpu);
  75. vcpu->stat.halt_wakeup++;
  76. if (vcpu->arch.dec_expires != ~(u64)0)
  77. hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
  78. kvmppc_vcpu_unblocked(vcpu);
  79. }
  80. void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
  81. {
  82. vcpu->arch.shregs.msr = msr;
  83. }
  84. void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
  85. {
  86. vcpu->arch.pvr = pvr;
  87. }
  88. void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
  89. {
  90. int r;
  91. pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
  92. pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
  93. vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
  94. for (r = 0; r < 16; ++r)
  95. pr_err("r%2d = %.16lx r%d = %.16lx\n",
  96. r, kvmppc_get_gpr(vcpu, r),
  97. r+16, kvmppc_get_gpr(vcpu, r+16));
  98. pr_err("ctr = %.16lx lr = %.16lx\n",
  99. vcpu->arch.ctr, vcpu->arch.lr);
  100. pr_err("srr0 = %.16llx srr1 = %.16llx\n",
  101. vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
  102. pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
  103. vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
  104. pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
  105. vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
  106. pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
  107. vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
  108. pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
  109. pr_err("fault dar = %.16lx dsisr = %.8x\n",
  110. vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
  111. pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
  112. for (r = 0; r < vcpu->arch.slb_max; ++r)
  113. pr_err(" ESID = %.16llx VSID = %.16llx\n",
  114. vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
  115. pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
  116. vcpu->arch.lpcr, vcpu->kvm->arch.sdr1,
  117. vcpu->arch.last_inst);
  118. }
  119. struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
  120. {
  121. int r;
  122. struct kvm_vcpu *v, *ret = NULL;
  123. mutex_lock(&kvm->lock);
  124. kvm_for_each_vcpu(r, v, kvm) {
  125. if (v->vcpu_id == id) {
  126. ret = v;
  127. break;
  128. }
  129. }
  130. mutex_unlock(&kvm->lock);
  131. return ret;
  132. }
  133. static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
  134. {
  135. vpa->shared_proc = 1;
  136. vpa->yield_count = 1;
  137. }
  138. static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
  139. unsigned long flags,
  140. unsigned long vcpuid, unsigned long vpa)
  141. {
  142. struct kvm *kvm = vcpu->kvm;
  143. unsigned long pg_index, ra, len;
  144. unsigned long pg_offset;
  145. void *va;
  146. struct kvm_vcpu *tvcpu;
  147. tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
  148. if (!tvcpu)
  149. return H_PARAMETER;
  150. flags >>= 63 - 18;
  151. flags &= 7;
  152. if (flags == 0 || flags == 4)
  153. return H_PARAMETER;
  154. if (flags < 4) {
  155. if (vpa & 0x7f)
  156. return H_PARAMETER;
  157. /* registering new area; convert logical addr to real */
  158. pg_index = vpa >> kvm->arch.ram_porder;
  159. pg_offset = vpa & (kvm->arch.ram_psize - 1);
  160. if (pg_index >= kvm->arch.ram_npages)
  161. return H_PARAMETER;
  162. if (kvm->arch.ram_pginfo[pg_index].pfn == 0)
  163. return H_PARAMETER;
  164. ra = kvm->arch.ram_pginfo[pg_index].pfn << PAGE_SHIFT;
  165. ra |= pg_offset;
  166. va = __va(ra);
  167. if (flags <= 1)
  168. len = *(unsigned short *)(va + 4);
  169. else
  170. len = *(unsigned int *)(va + 4);
  171. if (pg_offset + len > kvm->arch.ram_psize)
  172. return H_PARAMETER;
  173. switch (flags) {
  174. case 1: /* register VPA */
  175. if (len < 640)
  176. return H_PARAMETER;
  177. tvcpu->arch.vpa = va;
  178. init_vpa(vcpu, va);
  179. break;
  180. case 2: /* register DTL */
  181. if (len < 48)
  182. return H_PARAMETER;
  183. if (!tvcpu->arch.vpa)
  184. return H_RESOURCE;
  185. len -= len % 48;
  186. tvcpu->arch.dtl = va;
  187. tvcpu->arch.dtl_end = va + len;
  188. break;
  189. case 3: /* register SLB shadow buffer */
  190. if (len < 8)
  191. return H_PARAMETER;
  192. if (!tvcpu->arch.vpa)
  193. return H_RESOURCE;
  194. tvcpu->arch.slb_shadow = va;
  195. len = (len - 16) / 16;
  196. tvcpu->arch.slb_shadow = va;
  197. break;
  198. }
  199. } else {
  200. switch (flags) {
  201. case 5: /* unregister VPA */
  202. if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl)
  203. return H_RESOURCE;
  204. tvcpu->arch.vpa = NULL;
  205. break;
  206. case 6: /* unregister DTL */
  207. tvcpu->arch.dtl = NULL;
  208. break;
  209. case 7: /* unregister SLB shadow buffer */
  210. tvcpu->arch.slb_shadow = NULL;
  211. break;
  212. }
  213. }
  214. return H_SUCCESS;
  215. }
  216. int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
  217. {
  218. unsigned long req = kvmppc_get_gpr(vcpu, 3);
  219. unsigned long target, ret = H_SUCCESS;
  220. struct kvm_vcpu *tvcpu;
  221. switch (req) {
  222. case H_CEDE:
  223. vcpu->arch.shregs.msr |= MSR_EE;
  224. vcpu->arch.ceded = 1;
  225. smp_mb();
  226. if (!vcpu->arch.prodded)
  227. kvmppc_vcpu_block(vcpu);
  228. else
  229. vcpu->arch.prodded = 0;
  230. smp_mb();
  231. vcpu->arch.ceded = 0;
  232. break;
  233. case H_PROD:
  234. target = kvmppc_get_gpr(vcpu, 4);
  235. tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
  236. if (!tvcpu) {
  237. ret = H_PARAMETER;
  238. break;
  239. }
  240. tvcpu->arch.prodded = 1;
  241. smp_mb();
  242. if (vcpu->arch.ceded) {
  243. if (waitqueue_active(&vcpu->wq)) {
  244. wake_up_interruptible(&vcpu->wq);
  245. vcpu->stat.halt_wakeup++;
  246. }
  247. }
  248. break;
  249. case H_CONFER:
  250. break;
  251. case H_REGISTER_VPA:
  252. ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
  253. kvmppc_get_gpr(vcpu, 5),
  254. kvmppc_get_gpr(vcpu, 6));
  255. break;
  256. default:
  257. return RESUME_HOST;
  258. }
  259. kvmppc_set_gpr(vcpu, 3, ret);
  260. vcpu->arch.hcall_needed = 0;
  261. return RESUME_GUEST;
  262. }
  263. static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
  264. struct task_struct *tsk)
  265. {
  266. int r = RESUME_HOST;
  267. vcpu->stat.sum_exits++;
  268. run->exit_reason = KVM_EXIT_UNKNOWN;
  269. run->ready_for_interrupt_injection = 1;
  270. switch (vcpu->arch.trap) {
  271. /* We're good on these - the host merely wanted to get our attention */
  272. case BOOK3S_INTERRUPT_HV_DECREMENTER:
  273. vcpu->stat.dec_exits++;
  274. r = RESUME_GUEST;
  275. break;
  276. case BOOK3S_INTERRUPT_EXTERNAL:
  277. vcpu->stat.ext_intr_exits++;
  278. r = RESUME_GUEST;
  279. break;
  280. case BOOK3S_INTERRUPT_PERFMON:
  281. r = RESUME_GUEST;
  282. break;
  283. case BOOK3S_INTERRUPT_PROGRAM:
  284. {
  285. ulong flags;
  286. /*
  287. * Normally program interrupts are delivered directly
  288. * to the guest by the hardware, but we can get here
  289. * as a result of a hypervisor emulation interrupt
  290. * (e40) getting turned into a 700 by BML RTAS.
  291. */
  292. flags = vcpu->arch.shregs.msr & 0x1f0000ull;
  293. kvmppc_core_queue_program(vcpu, flags);
  294. r = RESUME_GUEST;
  295. break;
  296. }
  297. case BOOK3S_INTERRUPT_SYSCALL:
  298. {
  299. /* hcall - punt to userspace */
  300. int i;
  301. if (vcpu->arch.shregs.msr & MSR_PR) {
  302. /* sc 1 from userspace - reflect to guest syscall */
  303. kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL);
  304. r = RESUME_GUEST;
  305. break;
  306. }
  307. run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
  308. for (i = 0; i < 9; ++i)
  309. run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
  310. run->exit_reason = KVM_EXIT_PAPR_HCALL;
  311. vcpu->arch.hcall_needed = 1;
  312. r = RESUME_HOST;
  313. break;
  314. }
  315. /*
  316. * We get these next two if the guest does a bad real-mode access,
  317. * as we have enabled VRMA (virtualized real mode area) mode in the
  318. * LPCR. We just generate an appropriate DSI/ISI to the guest.
  319. */
  320. case BOOK3S_INTERRUPT_H_DATA_STORAGE:
  321. vcpu->arch.shregs.dsisr = vcpu->arch.fault_dsisr;
  322. vcpu->arch.shregs.dar = vcpu->arch.fault_dar;
  323. kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, 0);
  324. r = RESUME_GUEST;
  325. break;
  326. case BOOK3S_INTERRUPT_H_INST_STORAGE:
  327. kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE,
  328. 0x08000000);
  329. r = RESUME_GUEST;
  330. break;
  331. /*
  332. * This occurs if the guest executes an illegal instruction.
  333. * We just generate a program interrupt to the guest, since
  334. * we don't emulate any guest instructions at this stage.
  335. */
  336. case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
  337. kvmppc_core_queue_program(vcpu, 0x80000);
  338. r = RESUME_GUEST;
  339. break;
  340. default:
  341. kvmppc_dump_regs(vcpu);
  342. printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
  343. vcpu->arch.trap, kvmppc_get_pc(vcpu),
  344. vcpu->arch.shregs.msr);
  345. r = RESUME_HOST;
  346. BUG();
  347. break;
  348. }
  349. if (!(r & RESUME_HOST)) {
  350. /* To avoid clobbering exit_reason, only check for signals if
  351. * we aren't already exiting to userspace for some other
  352. * reason. */
  353. if (signal_pending(tsk)) {
  354. vcpu->stat.signal_exits++;
  355. run->exit_reason = KVM_EXIT_INTR;
  356. r = -EINTR;
  357. } else {
  358. kvmppc_core_deliver_interrupts(vcpu);
  359. }
  360. }
  361. return r;
  362. }
  363. int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
  364. struct kvm_sregs *sregs)
  365. {
  366. int i;
  367. sregs->pvr = vcpu->arch.pvr;
  368. memset(sregs, 0, sizeof(struct kvm_sregs));
  369. for (i = 0; i < vcpu->arch.slb_max; i++) {
  370. sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
  371. sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
  372. }
  373. return 0;
  374. }
  375. int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
  376. struct kvm_sregs *sregs)
  377. {
  378. int i, j;
  379. kvmppc_set_pvr(vcpu, sregs->pvr);
  380. j = 0;
  381. for (i = 0; i < vcpu->arch.slb_nr; i++) {
  382. if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
  383. vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
  384. vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
  385. ++j;
  386. }
  387. }
  388. vcpu->arch.slb_max = j;
  389. return 0;
  390. }
  391. int kvmppc_core_check_processor_compat(void)
  392. {
  393. if (cpu_has_feature(CPU_FTR_HVMODE_206))
  394. return 0;
  395. return -EIO;
  396. }
  397. struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
  398. {
  399. struct kvm_vcpu *vcpu;
  400. int err = -EINVAL;
  401. int core;
  402. struct kvmppc_vcore *vcore;
  403. unsigned long lpcr;
  404. core = id / threads_per_core;
  405. if (core >= KVM_MAX_VCORES)
  406. goto out;
  407. err = -ENOMEM;
  408. vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
  409. if (!vcpu)
  410. goto out;
  411. err = kvm_vcpu_init(vcpu, kvm, id);
  412. if (err)
  413. goto free_vcpu;
  414. vcpu->arch.shared = &vcpu->arch.shregs;
  415. vcpu->arch.last_cpu = -1;
  416. vcpu->arch.mmcr[0] = MMCR0_FC;
  417. vcpu->arch.ctrl = CTRL_RUNLATCH;
  418. /* default to host PVR, since we can't spoof it */
  419. vcpu->arch.pvr = mfspr(SPRN_PVR);
  420. kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
  421. lpcr = kvm->arch.host_lpcr & (LPCR_PECE | LPCR_LPES);
  422. lpcr |= LPCR_VPM0 | LPCR_VRMA_L | (4UL << LPCR_DPFD_SH) | LPCR_HDICE;
  423. vcpu->arch.lpcr = lpcr;
  424. kvmppc_mmu_book3s_hv_init(vcpu);
  425. /*
  426. * Some vcpus may start out in stopped state. If we initialize
  427. * them to busy-in-host state they will stop other vcpus in the
  428. * vcore from running. Instead we initialize them to blocked
  429. * state, effectively considering them to be stopped until we
  430. * see the first run ioctl for them.
  431. */
  432. vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
  433. init_waitqueue_head(&vcpu->arch.cpu_run);
  434. mutex_lock(&kvm->lock);
  435. vcore = kvm->arch.vcores[core];
  436. if (!vcore) {
  437. vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
  438. if (vcore) {
  439. INIT_LIST_HEAD(&vcore->runnable_threads);
  440. spin_lock_init(&vcore->lock);
  441. }
  442. kvm->arch.vcores[core] = vcore;
  443. }
  444. mutex_unlock(&kvm->lock);
  445. if (!vcore)
  446. goto free_vcpu;
  447. spin_lock(&vcore->lock);
  448. ++vcore->num_threads;
  449. ++vcore->n_blocked;
  450. spin_unlock(&vcore->lock);
  451. vcpu->arch.vcore = vcore;
  452. return vcpu;
  453. free_vcpu:
  454. kfree(vcpu);
  455. out:
  456. return ERR_PTR(err);
  457. }
  458. void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
  459. {
  460. kvm_vcpu_uninit(vcpu);
  461. kfree(vcpu);
  462. }
  463. static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu)
  464. {
  465. struct kvmppc_vcore *vc = vcpu->arch.vcore;
  466. spin_lock(&vc->lock);
  467. vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
  468. ++vc->n_blocked;
  469. if (vc->n_runnable > 0 &&
  470. vc->n_runnable + vc->n_blocked == vc->num_threads) {
  471. vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
  472. arch.run_list);
  473. wake_up(&vcpu->arch.cpu_run);
  474. }
  475. spin_unlock(&vc->lock);
  476. }
  477. static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu)
  478. {
  479. struct kvmppc_vcore *vc = vcpu->arch.vcore;
  480. spin_lock(&vc->lock);
  481. vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
  482. --vc->n_blocked;
  483. spin_unlock(&vc->lock);
  484. }
  485. extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
  486. extern void xics_wake_cpu(int cpu);
  487. static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
  488. struct kvm_vcpu *vcpu)
  489. {
  490. struct kvm_vcpu *v;
  491. if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
  492. return;
  493. vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
  494. --vc->n_runnable;
  495. /* decrement the physical thread id of each following vcpu */
  496. v = vcpu;
  497. list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
  498. --v->arch.ptid;
  499. list_del(&vcpu->arch.run_list);
  500. }
  501. static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
  502. {
  503. int cpu;
  504. struct paca_struct *tpaca;
  505. struct kvmppc_vcore *vc = vcpu->arch.vcore;
  506. cpu = vc->pcpu + vcpu->arch.ptid;
  507. tpaca = &paca[cpu];
  508. tpaca->kvm_hstate.kvm_vcpu = vcpu;
  509. tpaca->kvm_hstate.kvm_vcore = vc;
  510. smp_wmb();
  511. #ifdef CONFIG_PPC_ICP_NATIVE
  512. if (vcpu->arch.ptid) {
  513. tpaca->cpu_start = 0x80;
  514. tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST;
  515. wmb();
  516. xics_wake_cpu(cpu);
  517. ++vc->n_woken;
  518. }
  519. #endif
  520. }
  521. static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
  522. {
  523. int i;
  524. HMT_low();
  525. i = 0;
  526. while (vc->nap_count < vc->n_woken) {
  527. if (++i >= 1000000) {
  528. pr_err("kvmppc_wait_for_nap timeout %d %d\n",
  529. vc->nap_count, vc->n_woken);
  530. break;
  531. }
  532. cpu_relax();
  533. }
  534. HMT_medium();
  535. }
  536. /*
  537. * Check that we are on thread 0 and that any other threads in
  538. * this core are off-line.
  539. */
  540. static int on_primary_thread(void)
  541. {
  542. int cpu = smp_processor_id();
  543. int thr = cpu_thread_in_core(cpu);
  544. if (thr)
  545. return 0;
  546. while (++thr < threads_per_core)
  547. if (cpu_online(cpu + thr))
  548. return 0;
  549. return 1;
  550. }
  551. /*
  552. * Run a set of guest threads on a physical core.
  553. * Called with vc->lock held.
  554. */
  555. static int kvmppc_run_core(struct kvmppc_vcore *vc)
  556. {
  557. struct kvm_vcpu *vcpu, *vnext;
  558. long ret;
  559. u64 now;
  560. /* don't start if any threads have a signal pending */
  561. list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
  562. if (signal_pending(vcpu->arch.run_task))
  563. return 0;
  564. /*
  565. * Make sure we are running on thread 0, and that
  566. * secondary threads are offline.
  567. * XXX we should also block attempts to bring any
  568. * secondary threads online.
  569. */
  570. if (threads_per_core > 1 && !on_primary_thread()) {
  571. list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
  572. vcpu->arch.ret = -EBUSY;
  573. goto out;
  574. }
  575. vc->n_woken = 0;
  576. vc->nap_count = 0;
  577. vc->entry_exit_count = 0;
  578. vc->vcore_running = 1;
  579. vc->in_guest = 0;
  580. vc->pcpu = smp_processor_id();
  581. list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
  582. kvmppc_start_thread(vcpu);
  583. vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
  584. arch.run_list);
  585. spin_unlock(&vc->lock);
  586. preempt_disable();
  587. kvm_guest_enter();
  588. __kvmppc_vcore_entry(NULL, vcpu);
  589. /* wait for secondary threads to finish writing their state to memory */
  590. spin_lock(&vc->lock);
  591. if (vc->nap_count < vc->n_woken)
  592. kvmppc_wait_for_nap(vc);
  593. /* prevent other vcpu threads from doing kvmppc_start_thread() now */
  594. vc->vcore_running = 2;
  595. spin_unlock(&vc->lock);
  596. /* make sure updates to secondary vcpu structs are visible now */
  597. smp_mb();
  598. kvm_guest_exit();
  599. preempt_enable();
  600. kvm_resched(vcpu);
  601. now = get_tb();
  602. list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
  603. /* cancel pending dec exception if dec is positive */
  604. if (now < vcpu->arch.dec_expires &&
  605. kvmppc_core_pending_dec(vcpu))
  606. kvmppc_core_dequeue_dec(vcpu);
  607. if (!vcpu->arch.trap) {
  608. if (signal_pending(vcpu->arch.run_task)) {
  609. vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
  610. vcpu->arch.ret = -EINTR;
  611. }
  612. continue; /* didn't get to run */
  613. }
  614. ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
  615. vcpu->arch.run_task);
  616. vcpu->arch.ret = ret;
  617. vcpu->arch.trap = 0;
  618. }
  619. spin_lock(&vc->lock);
  620. out:
  621. vc->vcore_running = 0;
  622. list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
  623. arch.run_list) {
  624. if (vcpu->arch.ret != RESUME_GUEST) {
  625. kvmppc_remove_runnable(vc, vcpu);
  626. wake_up(&vcpu->arch.cpu_run);
  627. }
  628. }
  629. return 1;
  630. }
  631. static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
  632. {
  633. int ptid;
  634. int wait_state;
  635. struct kvmppc_vcore *vc;
  636. DEFINE_WAIT(wait);
  637. /* No need to go into the guest when all we do is going out */
  638. if (signal_pending(current)) {
  639. kvm_run->exit_reason = KVM_EXIT_INTR;
  640. return -EINTR;
  641. }
  642. kvm_run->exit_reason = 0;
  643. vcpu->arch.ret = RESUME_GUEST;
  644. vcpu->arch.trap = 0;
  645. flush_fp_to_thread(current);
  646. flush_altivec_to_thread(current);
  647. flush_vsx_to_thread(current);
  648. /*
  649. * Synchronize with other threads in this virtual core
  650. */
  651. vc = vcpu->arch.vcore;
  652. spin_lock(&vc->lock);
  653. /* This happens the first time this is called for a vcpu */
  654. if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED)
  655. --vc->n_blocked;
  656. vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
  657. ptid = vc->n_runnable;
  658. vcpu->arch.run_task = current;
  659. vcpu->arch.kvm_run = kvm_run;
  660. vcpu->arch.ptid = ptid;
  661. list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
  662. ++vc->n_runnable;
  663. wait_state = TASK_INTERRUPTIBLE;
  664. while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
  665. if (signal_pending(current)) {
  666. if (!vc->vcore_running) {
  667. kvm_run->exit_reason = KVM_EXIT_INTR;
  668. vcpu->arch.ret = -EINTR;
  669. break;
  670. }
  671. /* have to wait for vcore to stop executing guest */
  672. wait_state = TASK_UNINTERRUPTIBLE;
  673. smp_send_reschedule(vc->pcpu);
  674. }
  675. if (!vc->vcore_running &&
  676. vc->n_runnable + vc->n_blocked == vc->num_threads) {
  677. /* we can run now */
  678. if (kvmppc_run_core(vc))
  679. continue;
  680. }
  681. if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0)
  682. kvmppc_start_thread(vcpu);
  683. /* wait for other threads to come in, or wait for vcore */
  684. prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
  685. spin_unlock(&vc->lock);
  686. schedule();
  687. finish_wait(&vcpu->arch.cpu_run, &wait);
  688. spin_lock(&vc->lock);
  689. }
  690. if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
  691. kvmppc_remove_runnable(vc, vcpu);
  692. spin_unlock(&vc->lock);
  693. return vcpu->arch.ret;
  694. }
  695. int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
  696. {
  697. int r;
  698. do {
  699. r = kvmppc_run_vcpu(run, vcpu);
  700. if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
  701. !(vcpu->arch.shregs.msr & MSR_PR)) {
  702. r = kvmppc_pseries_do_hcall(vcpu);
  703. kvmppc_core_deliver_interrupts(vcpu);
  704. }
  705. } while (r == RESUME_GUEST);
  706. return r;
  707. }
  708. static long kvmppc_stt_npages(unsigned long window_size)
  709. {
  710. return ALIGN((window_size >> SPAPR_TCE_SHIFT)
  711. * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
  712. }
  713. static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
  714. {
  715. struct kvm *kvm = stt->kvm;
  716. int i;
  717. mutex_lock(&kvm->lock);
  718. list_del(&stt->list);
  719. for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
  720. __free_page(stt->pages[i]);
  721. kfree(stt);
  722. mutex_unlock(&kvm->lock);
  723. kvm_put_kvm(kvm);
  724. }
  725. static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
  726. {
  727. struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
  728. struct page *page;
  729. if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
  730. return VM_FAULT_SIGBUS;
  731. page = stt->pages[vmf->pgoff];
  732. get_page(page);
  733. vmf->page = page;
  734. return 0;
  735. }
  736. static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
  737. .fault = kvm_spapr_tce_fault,
  738. };
  739. static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
  740. {
  741. vma->vm_ops = &kvm_spapr_tce_vm_ops;
  742. return 0;
  743. }
  744. static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
  745. {
  746. struct kvmppc_spapr_tce_table *stt = filp->private_data;
  747. release_spapr_tce_table(stt);
  748. return 0;
  749. }
  750. static struct file_operations kvm_spapr_tce_fops = {
  751. .mmap = kvm_spapr_tce_mmap,
  752. .release = kvm_spapr_tce_release,
  753. };
  754. long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
  755. struct kvm_create_spapr_tce *args)
  756. {
  757. struct kvmppc_spapr_tce_table *stt = NULL;
  758. long npages;
  759. int ret = -ENOMEM;
  760. int i;
  761. /* Check this LIOBN hasn't been previously allocated */
  762. list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
  763. if (stt->liobn == args->liobn)
  764. return -EBUSY;
  765. }
  766. npages = kvmppc_stt_npages(args->window_size);
  767. stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *),
  768. GFP_KERNEL);
  769. if (!stt)
  770. goto fail;
  771. stt->liobn = args->liobn;
  772. stt->window_size = args->window_size;
  773. stt->kvm = kvm;
  774. for (i = 0; i < npages; i++) {
  775. stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
  776. if (!stt->pages[i])
  777. goto fail;
  778. }
  779. kvm_get_kvm(kvm);
  780. mutex_lock(&kvm->lock);
  781. list_add(&stt->list, &kvm->arch.spapr_tce_tables);
  782. mutex_unlock(&kvm->lock);
  783. return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
  784. stt, O_RDWR);
  785. fail:
  786. if (stt) {
  787. for (i = 0; i < npages; i++)
  788. if (stt->pages[i])
  789. __free_page(stt->pages[i]);
  790. kfree(stt);
  791. }
  792. return ret;
  793. }
  794. int kvmppc_core_prepare_memory_region(struct kvm *kvm,
  795. struct kvm_userspace_memory_region *mem)
  796. {
  797. if (mem->guest_phys_addr == 0 && mem->memory_size != 0)
  798. return kvmppc_prepare_vrma(kvm, mem);
  799. return 0;
  800. }
  801. void kvmppc_core_commit_memory_region(struct kvm *kvm,
  802. struct kvm_userspace_memory_region *mem)
  803. {
  804. if (mem->guest_phys_addr == 0 && mem->memory_size != 0)
  805. kvmppc_map_vrma(kvm, mem);
  806. }
  807. int kvmppc_core_init_vm(struct kvm *kvm)
  808. {
  809. long r;
  810. /* Allocate hashed page table */
  811. r = kvmppc_alloc_hpt(kvm);
  812. if (r)
  813. return r;
  814. INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
  815. return 0;
  816. }
  817. void kvmppc_core_destroy_vm(struct kvm *kvm)
  818. {
  819. kvmppc_free_hpt(kvm);
  820. WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
  821. }
  822. /* These are stubs for now */
  823. void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
  824. {
  825. }
  826. /* We don't need to emulate any privileged instructions or dcbz */
  827. int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
  828. unsigned int inst, int *advance)
  829. {
  830. return EMULATE_FAIL;
  831. }
  832. int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
  833. {
  834. return EMULATE_FAIL;
  835. }
  836. int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
  837. {
  838. return EMULATE_FAIL;
  839. }
  840. static int kvmppc_book3s_hv_init(void)
  841. {
  842. int r;
  843. r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
  844. if (r)
  845. return r;
  846. r = kvmppc_mmu_hv_init();
  847. return r;
  848. }
  849. static void kvmppc_book3s_hv_exit(void)
  850. {
  851. kvm_exit();
  852. }
  853. module_init(kvmppc_book3s_hv_init);
  854. module_exit(kvmppc_book3s_hv_exit);