kvm_trace.c 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. /*
  2. * kvm trace
  3. *
  4. * It is designed to allow debugging traces of kvm to be generated
  5. * on UP / SMP machines. Each trace entry can be timestamped so that
  6. * it's possible to reconstruct a chronological record of trace events.
  7. * The implementation refers to blktrace kernel support.
  8. *
  9. * Copyright (c) 2008 Intel Corporation
  10. * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
  11. *
  12. * Authors: Feng(Eric) Liu, eric.e.liu@intel.com
  13. *
  14. * Date: Feb 2008
  15. */
  16. #include <linux/module.h>
  17. #include <linux/relay.h>
  18. #include <linux/debugfs.h>
  19. #include <linux/ktime.h>
  20. #include <linux/kvm_host.h>
  21. #define KVM_TRACE_STATE_RUNNING (1 << 0)
  22. #define KVM_TRACE_STATE_PAUSE (1 << 1)
  23. #define KVM_TRACE_STATE_CLEARUP (1 << 2)
  24. struct kvm_trace {
  25. int trace_state;
  26. struct rchan *rchan;
  27. struct dentry *lost_file;
  28. atomic_t lost_records;
  29. };
  30. static struct kvm_trace *kvm_trace;
  31. struct kvm_trace_probe {
  32. const char *name;
  33. const char *format;
  34. u32 timestamp_in;
  35. marker_probe_func *probe_func;
  36. };
  37. static inline int calc_rec_size(int timestamp, int extra)
  38. {
  39. int rec_size = KVM_TRC_HEAD_SIZE;
  40. rec_size += extra;
  41. return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
  42. }
  43. static void kvm_add_trace(void *probe_private, void *call_data,
  44. const char *format, va_list *args)
  45. {
  46. struct kvm_trace_probe *p = probe_private;
  47. struct kvm_trace *kt = kvm_trace;
  48. struct kvm_trace_rec rec;
  49. struct kvm_vcpu *vcpu;
  50. int i, size;
  51. u32 extra;
  52. if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
  53. return;
  54. rec.rec_val = TRACE_REC_EVENT_ID(va_arg(*args, u32));
  55. vcpu = va_arg(*args, struct kvm_vcpu *);
  56. rec.pid = current->tgid;
  57. rec.vcpu_id = vcpu->vcpu_id;
  58. extra = va_arg(*args, u32);
  59. WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
  60. extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX);
  61. rec.rec_val |= TRACE_REC_TCS(p->timestamp_in)
  62. | TRACE_REC_NUM_DATA_ARGS(extra);
  63. if (p->timestamp_in) {
  64. rec.u.timestamp.timestamp = ktime_to_ns(ktime_get());
  65. for (i = 0; i < extra; i++)
  66. rec.u.timestamp.extra_u32[i] = va_arg(*args, u32);
  67. } else {
  68. for (i = 0; i < extra; i++)
  69. rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32);
  70. }
  71. size = calc_rec_size(p->timestamp_in, extra * sizeof(u32));
  72. relay_write(kt->rchan, &rec, size);
  73. }
  74. static struct kvm_trace_probe kvm_trace_probes[] = {
  75. { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace },
  76. { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace },
  77. };
  78. static int lost_records_get(void *data, u64 *val)
  79. {
  80. struct kvm_trace *kt = data;
  81. *val = atomic_read(&kt->lost_records);
  82. return 0;
  83. }
  84. DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n");
  85. /*
  86. * The relay channel is used in "no-overwrite" mode, it keeps trace of how
  87. * many times we encountered a full subbuffer, to tell user space app the
  88. * lost records there were.
  89. */
  90. static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
  91. void *prev_subbuf, size_t prev_padding)
  92. {
  93. struct kvm_trace *kt;
  94. if (!relay_buf_full(buf)) {
  95. if (!prev_subbuf) {
  96. /*
  97. * executed only once when the channel is opened
  98. * save metadata as first record
  99. */
  100. subbuf_start_reserve(buf, sizeof(u32));
  101. *(u32 *)subbuf = 0x12345678;
  102. }
  103. return 1;
  104. }
  105. kt = buf->chan->private_data;
  106. atomic_inc(&kt->lost_records);
  107. return 0;
  108. }
  109. static struct dentry *kvm_create_buf_file_callack(const char *filename,
  110. struct dentry *parent,
  111. int mode,
  112. struct rchan_buf *buf,
  113. int *is_global)
  114. {
  115. return debugfs_create_file(filename, mode, parent, buf,
  116. &relay_file_operations);
  117. }
  118. static int kvm_remove_buf_file_callback(struct dentry *dentry)
  119. {
  120. debugfs_remove(dentry);
  121. return 0;
  122. }
  123. static struct rchan_callbacks kvm_relay_callbacks = {
  124. .subbuf_start = kvm_subbuf_start_callback,
  125. .create_buf_file = kvm_create_buf_file_callack,
  126. .remove_buf_file = kvm_remove_buf_file_callback,
  127. };
  128. static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts)
  129. {
  130. struct kvm_trace *kt;
  131. int i, r = -ENOMEM;
  132. if (!kuts->buf_size || !kuts->buf_nr)
  133. return -EINVAL;
  134. kt = kzalloc(sizeof(*kt), GFP_KERNEL);
  135. if (!kt)
  136. goto err;
  137. r = -EIO;
  138. atomic_set(&kt->lost_records, 0);
  139. kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir,
  140. kt, &kvm_trace_lost_ops);
  141. if (!kt->lost_file)
  142. goto err;
  143. kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size,
  144. kuts->buf_nr, &kvm_relay_callbacks, kt);
  145. if (!kt->rchan)
  146. goto err;
  147. kvm_trace = kt;
  148. for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
  149. struct kvm_trace_probe *p = &kvm_trace_probes[i];
  150. r = marker_probe_register(p->name, p->format, p->probe_func, p);
  151. if (r)
  152. printk(KERN_INFO "Unable to register probe %s\n",
  153. p->name);
  154. }
  155. kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING;
  156. return 0;
  157. err:
  158. if (kt) {
  159. if (kt->lost_file)
  160. debugfs_remove(kt->lost_file);
  161. if (kt->rchan)
  162. relay_close(kt->rchan);
  163. kfree(kt);
  164. }
  165. return r;
  166. }
  167. static int kvm_trace_enable(char __user *arg)
  168. {
  169. struct kvm_user_trace_setup kuts;
  170. int ret;
  171. ret = copy_from_user(&kuts, arg, sizeof(kuts));
  172. if (ret)
  173. return -EFAULT;
  174. ret = do_kvm_trace_enable(&kuts);
  175. if (ret)
  176. return ret;
  177. return 0;
  178. }
  179. static int kvm_trace_pause(void)
  180. {
  181. struct kvm_trace *kt = kvm_trace;
  182. int r = -EINVAL;
  183. if (kt == NULL)
  184. return r;
  185. if (kt->trace_state == KVM_TRACE_STATE_RUNNING) {
  186. kt->trace_state = KVM_TRACE_STATE_PAUSE;
  187. relay_flush(kt->rchan);
  188. r = 0;
  189. }
  190. return r;
  191. }
  192. void kvm_trace_cleanup(void)
  193. {
  194. struct kvm_trace *kt = kvm_trace;
  195. int i;
  196. if (kt == NULL)
  197. return;
  198. if (kt->trace_state == KVM_TRACE_STATE_RUNNING ||
  199. kt->trace_state == KVM_TRACE_STATE_PAUSE) {
  200. kt->trace_state = KVM_TRACE_STATE_CLEARUP;
  201. for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
  202. struct kvm_trace_probe *p = &kvm_trace_probes[i];
  203. marker_probe_unregister(p->name, p->probe_func, p);
  204. }
  205. marker_synchronize_unregister();
  206. relay_close(kt->rchan);
  207. debugfs_remove(kt->lost_file);
  208. kfree(kt);
  209. }
  210. }
  211. int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
  212. {
  213. void __user *argp = (void __user *)arg;
  214. long r = -EINVAL;
  215. if (!capable(CAP_SYS_ADMIN))
  216. return -EPERM;
  217. switch (ioctl) {
  218. case KVM_TRACE_ENABLE:
  219. r = kvm_trace_enable(argp);
  220. break;
  221. case KVM_TRACE_PAUSE:
  222. r = kvm_trace_pause();
  223. break;
  224. case KVM_TRACE_DISABLE:
  225. r = 0;
  226. kvm_trace_cleanup();
  227. break;
  228. }
  229. return r;
  230. }