ds.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032
  1. /*
  2. * Debug Store support
  3. *
  4. * This provides a low-level interface to the hardware's Debug Store
  5. * feature that is used for branch trace store (BTS) and
  6. * precise-event based sampling (PEBS).
  7. *
  8. * It manages:
  9. * - DS and BTS hardware configuration
  10. * - buffer overflow handling (to be done)
  11. * - buffer access
  12. *
  13. * It does not do:
  14. * - security checking (is the caller allowed to trace the task)
  15. * - buffer allocation (memory accounting)
  16. *
  17. *
  18. * Copyright (C) 2007-2009 Intel Corporation.
  19. * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
  20. */
  21. #include <asm/ds.h>
  22. #include <linux/errno.h>
  23. #include <linux/string.h>
  24. #include <linux/slab.h>
  25. #include <linux/sched.h>
  26. #include <linux/mm.h>
  27. #include <linux/kernel.h>
  28. /*
  29. * The configuration for a particular DS hardware implementation.
  30. */
  31. struct ds_configuration {
  32. /* the name of the configuration */
  33. const char *name;
  34. /* the size of one pointer-typed field in the DS structure and
  35. in the BTS and PEBS buffers in bytes;
  36. this covers the first 8 DS fields related to buffer management. */
  37. unsigned char sizeof_field;
  38. /* the size of a BTS/PEBS record in bytes */
  39. unsigned char sizeof_rec[2];
  40. /* a series of bit-masks to control various features indexed
  41. * by enum ds_feature */
  42. unsigned long ctl[dsf_ctl_max];
  43. };
  44. static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
  45. #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
  46. #define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */
  47. #define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */
  48. #define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
  49. #define BTS_CONTROL \
  50. (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
  51. ds_cfg.ctl[dsf_bts_overflow])
  52. /*
  53. * A BTS or PEBS tracer.
  54. *
  55. * This holds the configuration of the tracer and serves as a handle
  56. * to identify tracers.
  57. */
  58. struct ds_tracer {
  59. /* the DS context (partially) owned by this tracer */
  60. struct ds_context *context;
  61. /* the buffer provided on ds_request() and its size in bytes */
  62. void *buffer;
  63. size_t size;
  64. };
  65. struct bts_tracer {
  66. /* the common DS part */
  67. struct ds_tracer ds;
  68. /* the trace including the DS configuration */
  69. struct bts_trace trace;
  70. /* buffer overflow notification function */
  71. bts_ovfl_callback_t ovfl;
  72. };
  73. struct pebs_tracer {
  74. /* the common DS part */
  75. struct ds_tracer ds;
  76. /* the trace including the DS configuration */
  77. struct pebs_trace trace;
  78. /* buffer overflow notification function */
  79. pebs_ovfl_callback_t ovfl;
  80. };
  81. /*
  82. * Debug Store (DS) save area configuration (see Intel64 and IA32
  83. * Architectures Software Developer's Manual, section 18.5)
  84. *
  85. * The DS configuration consists of the following fields; different
  86. * architetures vary in the size of those fields.
  87. * - double-word aligned base linear address of the BTS buffer
  88. * - write pointer into the BTS buffer
  89. * - end linear address of the BTS buffer (one byte beyond the end of
  90. * the buffer)
  91. * - interrupt pointer into BTS buffer
  92. * (interrupt occurs when write pointer passes interrupt pointer)
  93. * - double-word aligned base linear address of the PEBS buffer
  94. * - write pointer into the PEBS buffer
  95. * - end linear address of the PEBS buffer (one byte beyond the end of
  96. * the buffer)
  97. * - interrupt pointer into PEBS buffer
  98. * (interrupt occurs when write pointer passes interrupt pointer)
  99. * - value to which counter is reset following counter overflow
  100. *
  101. * Later architectures use 64bit pointers throughout, whereas earlier
  102. * architectures use 32bit pointers in 32bit mode.
  103. *
  104. *
  105. * We compute the base address for the first 8 fields based on:
  106. * - the field size stored in the DS configuration
  107. * - the relative field position
  108. * - an offset giving the start of the respective region
  109. *
  110. * This offset is further used to index various arrays holding
  111. * information for BTS and PEBS at the respective index.
  112. *
  113. * On later 32bit processors, we only access the lower 32bit of the
  114. * 64bit pointer fields. The upper halves will be zeroed out.
  115. */
  116. enum ds_field {
  117. ds_buffer_base = 0,
  118. ds_index,
  119. ds_absolute_maximum,
  120. ds_interrupt_threshold,
  121. };
  122. enum ds_qualifier {
  123. ds_bts = 0,
  124. ds_pebs
  125. };
  126. static inline unsigned long ds_get(const unsigned char *base,
  127. enum ds_qualifier qual, enum ds_field field)
  128. {
  129. base += (ds_cfg.sizeof_field * (field + (4 * qual)));
  130. return *(unsigned long *)base;
  131. }
  132. static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
  133. enum ds_field field, unsigned long value)
  134. {
  135. base += (ds_cfg.sizeof_field * (field + (4 * qual)));
  136. (*(unsigned long *)base) = value;
  137. }
  138. /*
  139. * Locking is done only for allocating BTS or PEBS resources.
  140. */
  141. static DEFINE_SPINLOCK(ds_lock);
  142. /*
  143. * We either support (system-wide) per-cpu or per-thread allocation.
  144. * We distinguish the two based on the task_struct pointer, where a
  145. * NULL pointer indicates per-cpu allocation for the current cpu.
  146. *
  147. * Allocations are use-counted. As soon as resources are allocated,
  148. * further allocations must be of the same type (per-cpu or
  149. * per-thread). We model this by counting allocations (i.e. the number
  150. * of tracers of a certain type) for one type negatively:
  151. * =0 no tracers
  152. * >0 number of per-thread tracers
  153. * <0 number of per-cpu tracers
  154. *
  155. * Tracers essentially gives the number of ds contexts for a certain
  156. * type of allocation.
  157. */
  158. static atomic_t tracers = ATOMIC_INIT(0);
  159. static inline void get_tracer(struct task_struct *task)
  160. {
  161. if (task)
  162. atomic_inc(&tracers);
  163. else
  164. atomic_dec(&tracers);
  165. }
  166. static inline void put_tracer(struct task_struct *task)
  167. {
  168. if (task)
  169. atomic_dec(&tracers);
  170. else
  171. atomic_inc(&tracers);
  172. }
  173. static inline int check_tracer(struct task_struct *task)
  174. {
  175. return task ?
  176. (atomic_read(&tracers) >= 0) :
  177. (atomic_read(&tracers) <= 0);
  178. }
  179. /*
  180. * The DS context is either attached to a thread or to a cpu:
  181. * - in the former case, the thread_struct contains a pointer to the
  182. * attached context.
  183. * - in the latter case, we use a static array of per-cpu context
  184. * pointers.
  185. *
  186. * Contexts are use-counted. They are allocated on first access and
  187. * deallocated when the last user puts the context.
  188. */
  189. struct ds_context {
  190. /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
  191. unsigned char ds[MAX_SIZEOF_DS];
  192. /* the owner of the BTS and PEBS configuration, respectively */
  193. struct bts_tracer *bts_master;
  194. struct pebs_tracer *pebs_master;
  195. /* use count */
  196. unsigned long count;
  197. /* a pointer to the context location inside the thread_struct
  198. * or the per_cpu context array */
  199. struct ds_context **this;
  200. /* a pointer to the task owning this context, or NULL, if the
  201. * context is owned by a cpu */
  202. struct task_struct *task;
  203. };
  204. static DEFINE_PER_CPU(struct ds_context *, system_context_array);
  205. #define system_context per_cpu(system_context_array, smp_processor_id())
  206. static inline struct ds_context *ds_get_context(struct task_struct *task)
  207. {
  208. struct ds_context **p_context =
  209. (task ? &task->thread.ds_ctx : &system_context);
  210. struct ds_context *context = NULL;
  211. struct ds_context *new_context = NULL;
  212. unsigned long irq;
  213. /* Chances are small that we already have a context. */
  214. new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
  215. if (!new_context)
  216. return NULL;
  217. spin_lock_irqsave(&ds_lock, irq);
  218. context = *p_context;
  219. if (!context) {
  220. context = new_context;
  221. context->this = p_context;
  222. context->task = task;
  223. context->count = 0;
  224. if (task)
  225. set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
  226. if (!task || (task == current))
  227. wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds);
  228. *p_context = context;
  229. }
  230. context->count++;
  231. spin_unlock_irqrestore(&ds_lock, irq);
  232. if (context != new_context)
  233. kfree(new_context);
  234. return context;
  235. }
  236. static inline void ds_put_context(struct ds_context *context)
  237. {
  238. unsigned long irq;
  239. if (!context)
  240. return;
  241. spin_lock_irqsave(&ds_lock, irq);
  242. if (--context->count) {
  243. spin_unlock_irqrestore(&ds_lock, irq);
  244. return;
  245. }
  246. *(context->this) = NULL;
  247. if (context->task)
  248. clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
  249. if (!context->task || (context->task == current))
  250. wrmsrl(MSR_IA32_DS_AREA, 0);
  251. spin_unlock_irqrestore(&ds_lock, irq);
  252. kfree(context);
  253. }
  254. /*
  255. * Call the tracer's callback on a buffer overflow.
  256. *
  257. * context: the ds context
  258. * qual: the buffer type
  259. */
  260. static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
  261. {
  262. switch (qual) {
  263. case ds_bts:
  264. if (context->bts_master &&
  265. context->bts_master->ovfl)
  266. context->bts_master->ovfl(context->bts_master);
  267. break;
  268. case ds_pebs:
  269. if (context->pebs_master &&
  270. context->pebs_master->ovfl)
  271. context->pebs_master->ovfl(context->pebs_master);
  272. break;
  273. }
  274. }
  275. /*
  276. * Write raw data into the BTS or PEBS buffer.
  277. *
  278. * The remainder of any partially written record is zeroed out.
  279. *
  280. * context: the DS context
  281. * qual: the buffer type
  282. * record: the data to write
  283. * size: the size of the data
  284. */
  285. static int ds_write(struct ds_context *context, enum ds_qualifier qual,
  286. const void *record, size_t size)
  287. {
  288. int bytes_written = 0;
  289. if (!record)
  290. return -EINVAL;
  291. while (size) {
  292. unsigned long base, index, end, write_end, int_th;
  293. unsigned long write_size, adj_write_size;
  294. /*
  295. * write as much as possible without producing an
  296. * overflow interrupt.
  297. *
  298. * interrupt_threshold must either be
  299. * - bigger than absolute_maximum or
  300. * - point to a record between buffer_base and absolute_maximum
  301. *
  302. * index points to a valid record.
  303. */
  304. base = ds_get(context->ds, qual, ds_buffer_base);
  305. index = ds_get(context->ds, qual, ds_index);
  306. end = ds_get(context->ds, qual, ds_absolute_maximum);
  307. int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
  308. write_end = min(end, int_th);
  309. /* if we are already beyond the interrupt threshold,
  310. * we fill the entire buffer */
  311. if (write_end <= index)
  312. write_end = end;
  313. if (write_end <= index)
  314. break;
  315. write_size = min((unsigned long) size, write_end - index);
  316. memcpy((void *)index, record, write_size);
  317. record = (const char *)record + write_size;
  318. size -= write_size;
  319. bytes_written += write_size;
  320. adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
  321. adj_write_size *= ds_cfg.sizeof_rec[qual];
  322. /* zero out trailing bytes */
  323. memset((char *)index + write_size, 0,
  324. adj_write_size - write_size);
  325. index += adj_write_size;
  326. if (index >= end)
  327. index = base;
  328. ds_set(context->ds, qual, ds_index, index);
  329. if (index >= int_th)
  330. ds_overflow(context, qual);
  331. }
  332. return bytes_written;
  333. }
  334. /*
  335. * Branch Trace Store (BTS) uses the following format. Different
  336. * architectures vary in the size of those fields.
  337. * - source linear address
  338. * - destination linear address
  339. * - flags
  340. *
  341. * Later architectures use 64bit pointers throughout, whereas earlier
  342. * architectures use 32bit pointers in 32bit mode.
  343. *
  344. * We compute the base address for the first 8 fields based on:
  345. * - the field size stored in the DS configuration
  346. * - the relative field position
  347. *
  348. * In order to store additional information in the BTS buffer, we use
  349. * a special source address to indicate that the record requires
  350. * special interpretation.
  351. *
  352. * Netburst indicated via a bit in the flags field whether the branch
  353. * was predicted; this is ignored.
  354. *
  355. * We use two levels of abstraction:
  356. * - the raw data level defined here
  357. * - an arch-independent level defined in ds.h
  358. */
  359. enum bts_field {
  360. bts_from,
  361. bts_to,
  362. bts_flags,
  363. bts_qual = bts_from,
  364. bts_jiffies = bts_to,
  365. bts_pid = bts_flags,
  366. bts_qual_mask = (bts_qual_max - 1),
  367. bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
  368. };
  369. static inline unsigned long bts_get(const char *base, enum bts_field field)
  370. {
  371. base += (ds_cfg.sizeof_field * field);
  372. return *(unsigned long *)base;
  373. }
  374. static inline void bts_set(char *base, enum bts_field field, unsigned long val)
  375. {
  376. base += (ds_cfg.sizeof_field * field);;
  377. (*(unsigned long *)base) = val;
  378. }
  379. /*
  380. * The raw BTS data is architecture dependent.
  381. *
  382. * For higher-level users, we give an arch-independent view.
  383. * - ds.h defines struct bts_struct
  384. * - bts_read translates one raw bts record into a bts_struct
  385. * - bts_write translates one bts_struct into the raw format and
  386. * writes it into the top of the parameter tracer's buffer.
  387. *
  388. * return: bytes read/written on success; -Eerrno, otherwise
  389. */
  390. static int bts_read(struct bts_tracer *tracer, const void *at,
  391. struct bts_struct *out)
  392. {
  393. if (!tracer)
  394. return -EINVAL;
  395. if (at < tracer->trace.ds.begin)
  396. return -EINVAL;
  397. if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
  398. return -EINVAL;
  399. memset(out, 0, sizeof(*out));
  400. if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
  401. out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
  402. out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
  403. out->variant.timestamp.pid = bts_get(at, bts_pid);
  404. } else {
  405. out->qualifier = bts_branch;
  406. out->variant.lbr.from = bts_get(at, bts_from);
  407. out->variant.lbr.to = bts_get(at, bts_to);
  408. if (!out->variant.lbr.from && !out->variant.lbr.to)
  409. out->qualifier = bts_invalid;
  410. }
  411. return ds_cfg.sizeof_rec[ds_bts];
  412. }
  413. static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
  414. {
  415. unsigned char raw[MAX_SIZEOF_BTS];
  416. if (!tracer)
  417. return -EINVAL;
  418. if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
  419. return -EOVERFLOW;
  420. switch (in->qualifier) {
  421. case bts_invalid:
  422. bts_set(raw, bts_from, 0);
  423. bts_set(raw, bts_to, 0);
  424. bts_set(raw, bts_flags, 0);
  425. break;
  426. case bts_branch:
  427. bts_set(raw, bts_from, in->variant.lbr.from);
  428. bts_set(raw, bts_to, in->variant.lbr.to);
  429. bts_set(raw, bts_flags, 0);
  430. break;
  431. case bts_task_arrives:
  432. case bts_task_departs:
  433. bts_set(raw, bts_qual, (bts_escape | in->qualifier));
  434. bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
  435. bts_set(raw, bts_pid, in->variant.timestamp.pid);
  436. break;
  437. default:
  438. return -EINVAL;
  439. }
  440. return ds_write(tracer->ds.context, ds_bts, raw,
  441. ds_cfg.sizeof_rec[ds_bts]);
  442. }
  443. static void ds_write_config(struct ds_context *context,
  444. struct ds_trace *cfg, enum ds_qualifier qual)
  445. {
  446. unsigned char *ds = context->ds;
  447. ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
  448. ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
  449. ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
  450. ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
  451. }
  452. static void ds_read_config(struct ds_context *context,
  453. struct ds_trace *cfg, enum ds_qualifier qual)
  454. {
  455. unsigned char *ds = context->ds;
  456. cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
  457. cfg->top = (void *)ds_get(ds, qual, ds_index);
  458. cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
  459. cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
  460. }
  461. static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
  462. void *base, size_t size, size_t ith,
  463. unsigned int flags) {
  464. unsigned long buffer, adj;
  465. /* adjust the buffer address and size to meet alignment
  466. * constraints:
  467. * - buffer is double-word aligned
  468. * - size is multiple of record size
  469. *
  470. * We checked the size at the very beginning; we have enough
  471. * space to do the adjustment.
  472. */
  473. buffer = (unsigned long)base;
  474. adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
  475. buffer += adj;
  476. size -= adj;
  477. trace->n = size / ds_cfg.sizeof_rec[qual];
  478. trace->size = ds_cfg.sizeof_rec[qual];
  479. size = (trace->n * trace->size);
  480. trace->begin = (void *)buffer;
  481. trace->top = trace->begin;
  482. trace->end = (void *)(buffer + size);
  483. /* The value for 'no threshold' is -1, which will set the
  484. * threshold outside of the buffer, just like we want it.
  485. */
  486. trace->ith = (void *)(buffer + size - ith);
  487. trace->flags = flags;
  488. }
  489. static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
  490. enum ds_qualifier qual, struct task_struct *task,
  491. void *base, size_t size, size_t th, unsigned int flags)
  492. {
  493. struct ds_context *context;
  494. int error;
  495. error = -EINVAL;
  496. if (!base)
  497. goto out;
  498. /* we require some space to do alignment adjustments below */
  499. error = -EINVAL;
  500. if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
  501. goto out;
  502. if (th != (size_t)-1) {
  503. th *= ds_cfg.sizeof_rec[qual];
  504. error = -EINVAL;
  505. if (size <= th)
  506. goto out;
  507. }
  508. tracer->buffer = base;
  509. tracer->size = size;
  510. error = -ENOMEM;
  511. context = ds_get_context(task);
  512. if (!context)
  513. goto out;
  514. tracer->context = context;
  515. ds_init_ds_trace(trace, qual, base, size, th, flags);
  516. error = 0;
  517. out:
  518. return error;
  519. }
  520. struct bts_tracer *ds_request_bts(struct task_struct *task,
  521. void *base, size_t size,
  522. bts_ovfl_callback_t ovfl, size_t th,
  523. unsigned int flags)
  524. {
  525. struct bts_tracer *tracer;
  526. unsigned long irq;
  527. int error;
  528. error = -EOPNOTSUPP;
  529. if (!ds_cfg.ctl[dsf_bts])
  530. goto out;
  531. /* buffer overflow notification is not yet implemented */
  532. error = -EOPNOTSUPP;
  533. if (ovfl)
  534. goto out;
  535. error = -ENOMEM;
  536. tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
  537. if (!tracer)
  538. goto out;
  539. tracer->ovfl = ovfl;
  540. error = ds_request(&tracer->ds, &tracer->trace.ds,
  541. ds_bts, task, base, size, th, flags);
  542. if (error < 0)
  543. goto out_tracer;
  544. spin_lock_irqsave(&ds_lock, irq);
  545. error = -EPERM;
  546. if (!check_tracer(task))
  547. goto out_unlock;
  548. get_tracer(task);
  549. error = -EPERM;
  550. if (tracer->ds.context->bts_master)
  551. goto out_put_tracer;
  552. tracer->ds.context->bts_master = tracer;
  553. spin_unlock_irqrestore(&ds_lock, irq);
  554. tracer->trace.read = bts_read;
  555. tracer->trace.write = bts_write;
  556. ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
  557. ds_resume_bts(tracer);
  558. return tracer;
  559. out_put_tracer:
  560. put_tracer(task);
  561. out_unlock:
  562. spin_unlock_irqrestore(&ds_lock, irq);
  563. ds_put_context(tracer->ds.context);
  564. out_tracer:
  565. kfree(tracer);
  566. out:
  567. return ERR_PTR(error);
  568. }
  569. struct pebs_tracer *ds_request_pebs(struct task_struct *task,
  570. void *base, size_t size,
  571. pebs_ovfl_callback_t ovfl, size_t th,
  572. unsigned int flags)
  573. {
  574. struct pebs_tracer *tracer;
  575. unsigned long irq;
  576. int error;
  577. /* buffer overflow notification is not yet implemented */
  578. error = -EOPNOTSUPP;
  579. if (ovfl)
  580. goto out;
  581. error = -ENOMEM;
  582. tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
  583. if (!tracer)
  584. goto out;
  585. tracer->ovfl = ovfl;
  586. error = ds_request(&tracer->ds, &tracer->trace.ds,
  587. ds_pebs, task, base, size, th, flags);
  588. if (error < 0)
  589. goto out_tracer;
  590. spin_lock_irqsave(&ds_lock, irq);
  591. error = -EPERM;
  592. if (!check_tracer(task))
  593. goto out_unlock;
  594. get_tracer(task);
  595. error = -EPERM;
  596. if (tracer->ds.context->pebs_master)
  597. goto out_put_tracer;
  598. tracer->ds.context->pebs_master = tracer;
  599. spin_unlock_irqrestore(&ds_lock, irq);
  600. ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
  601. ds_resume_pebs(tracer);
  602. return tracer;
  603. out_put_tracer:
  604. put_tracer(task);
  605. out_unlock:
  606. spin_unlock_irqrestore(&ds_lock, irq);
  607. ds_put_context(tracer->ds.context);
  608. out_tracer:
  609. kfree(tracer);
  610. out:
  611. return ERR_PTR(error);
  612. }
  613. void ds_release_bts(struct bts_tracer *tracer)
  614. {
  615. if (!tracer)
  616. return;
  617. ds_suspend_bts(tracer);
  618. WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
  619. tracer->ds.context->bts_master = NULL;
  620. put_tracer(tracer->ds.context->task);
  621. ds_put_context(tracer->ds.context);
  622. kfree(tracer);
  623. }
  624. void ds_suspend_bts(struct bts_tracer *tracer)
  625. {
  626. struct task_struct *task;
  627. if (!tracer)
  628. return;
  629. task = tracer->ds.context->task;
  630. if (!task || (task == current))
  631. update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
  632. if (task) {
  633. task->thread.debugctlmsr &= ~BTS_CONTROL;
  634. if (!task->thread.debugctlmsr)
  635. clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
  636. }
  637. }
  638. void ds_resume_bts(struct bts_tracer *tracer)
  639. {
  640. struct task_struct *task;
  641. unsigned long control;
  642. if (!tracer)
  643. return;
  644. task = tracer->ds.context->task;
  645. control = ds_cfg.ctl[dsf_bts];
  646. if (!(tracer->trace.ds.flags & BTS_KERNEL))
  647. control |= ds_cfg.ctl[dsf_bts_kernel];
  648. if (!(tracer->trace.ds.flags & BTS_USER))
  649. control |= ds_cfg.ctl[dsf_bts_user];
  650. if (task) {
  651. task->thread.debugctlmsr |= control;
  652. set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
  653. }
  654. if (!task || (task == current))
  655. update_debugctlmsr(get_debugctlmsr() | control);
  656. }
  657. void ds_release_pebs(struct pebs_tracer *tracer)
  658. {
  659. if (!tracer)
  660. return;
  661. ds_suspend_pebs(tracer);
  662. WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
  663. tracer->ds.context->pebs_master = NULL;
  664. put_tracer(tracer->ds.context->task);
  665. ds_put_context(tracer->ds.context);
  666. kfree(tracer);
  667. }
  668. void ds_suspend_pebs(struct pebs_tracer *tracer)
  669. {
  670. }
  671. void ds_resume_pebs(struct pebs_tracer *tracer)
  672. {
  673. }
  674. const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
  675. {
  676. if (!tracer)
  677. return NULL;
  678. ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
  679. return &tracer->trace;
  680. }
  681. const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
  682. {
  683. if (!tracer)
  684. return NULL;
  685. ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
  686. tracer->trace.reset_value =
  687. *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
  688. return &tracer->trace;
  689. }
  690. int ds_reset_bts(struct bts_tracer *tracer)
  691. {
  692. if (!tracer)
  693. return -EINVAL;
  694. tracer->trace.ds.top = tracer->trace.ds.begin;
  695. ds_set(tracer->ds.context->ds, ds_bts, ds_index,
  696. (unsigned long)tracer->trace.ds.top);
  697. return 0;
  698. }
  699. int ds_reset_pebs(struct pebs_tracer *tracer)
  700. {
  701. if (!tracer)
  702. return -EINVAL;
  703. tracer->trace.ds.top = tracer->trace.ds.begin;
  704. ds_set(tracer->ds.context->ds, ds_bts, ds_index,
  705. (unsigned long)tracer->trace.ds.top);
  706. return 0;
  707. }
  708. int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
  709. {
  710. if (!tracer)
  711. return -EINVAL;
  712. *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
  713. return 0;
  714. }
  715. static const struct ds_configuration ds_cfg_netburst = {
  716. .name = "Netburst",
  717. .ctl[dsf_bts] = (1 << 2) | (1 << 3),
  718. .ctl[dsf_bts_kernel] = (1 << 5),
  719. .ctl[dsf_bts_user] = (1 << 6),
  720. .sizeof_field = sizeof(long),
  721. .sizeof_rec[ds_bts] = sizeof(long) * 3,
  722. #ifdef __i386__
  723. .sizeof_rec[ds_pebs] = sizeof(long) * 10,
  724. #else
  725. .sizeof_rec[ds_pebs] = sizeof(long) * 18,
  726. #endif
  727. };
  728. static const struct ds_configuration ds_cfg_pentium_m = {
  729. .name = "Pentium M",
  730. .ctl[dsf_bts] = (1 << 6) | (1 << 7),
  731. .sizeof_field = sizeof(long),
  732. .sizeof_rec[ds_bts] = sizeof(long) * 3,
  733. #ifdef __i386__
  734. .sizeof_rec[ds_pebs] = sizeof(long) * 10,
  735. #else
  736. .sizeof_rec[ds_pebs] = sizeof(long) * 18,
  737. #endif
  738. };
  739. static const struct ds_configuration ds_cfg_core2_atom = {
  740. .name = "Core 2/Atom",
  741. .ctl[dsf_bts] = (1 << 6) | (1 << 7),
  742. .ctl[dsf_bts_kernel] = (1 << 9),
  743. .ctl[dsf_bts_user] = (1 << 10),
  744. .sizeof_field = 8,
  745. .sizeof_rec[ds_bts] = 8 * 3,
  746. .sizeof_rec[ds_pebs] = 8 * 18,
  747. };
  748. static void
  749. ds_configure(const struct ds_configuration *cfg)
  750. {
  751. memset(&ds_cfg, 0, sizeof(ds_cfg));
  752. ds_cfg = *cfg;
  753. printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name);
  754. if (!cpu_has_bts) {
  755. ds_cfg.ctl[dsf_bts] = 0;
  756. printk(KERN_INFO "[ds] bts not available\n");
  757. }
  758. if (!cpu_has_pebs)
  759. printk(KERN_INFO "[ds] pebs not available\n");
  760. WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
  761. }
  762. void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
  763. {
  764. switch (c->x86) {
  765. case 0x6:
  766. switch (c->x86_model) {
  767. case 0x9:
  768. case 0xd: /* Pentium M */
  769. ds_configure(&ds_cfg_pentium_m);
  770. break;
  771. case 0xf:
  772. case 0x17: /* Core2 */
  773. case 0x1c: /* Atom */
  774. ds_configure(&ds_cfg_core2_atom);
  775. break;
  776. case 0x1a: /* i7 */
  777. default:
  778. /* sorry, don't know about them */
  779. break;
  780. }
  781. break;
  782. case 0xf:
  783. switch (c->x86_model) {
  784. case 0x0:
  785. case 0x1:
  786. case 0x2: /* Netburst */
  787. ds_configure(&ds_cfg_netburst);
  788. break;
  789. default:
  790. /* sorry, don't know about them */
  791. break;
  792. }
  793. break;
  794. default:
  795. /* sorry, don't know about them */
  796. break;
  797. }
  798. }
  799. /*
  800. * Change the DS configuration from tracing prev to tracing next.
  801. */
  802. void ds_switch_to(struct task_struct *prev, struct task_struct *next)
  803. {
  804. struct ds_context *prev_ctx = prev->thread.ds_ctx;
  805. struct ds_context *next_ctx = next->thread.ds_ctx;
  806. if (prev_ctx) {
  807. update_debugctlmsr(0);
  808. if (prev_ctx->bts_master &&
  809. (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
  810. struct bts_struct ts = {
  811. .qualifier = bts_task_departs,
  812. .variant.timestamp.jiffies = jiffies_64,
  813. .variant.timestamp.pid = prev->pid
  814. };
  815. bts_write(prev_ctx->bts_master, &ts);
  816. }
  817. }
  818. if (next_ctx) {
  819. if (next_ctx->bts_master &&
  820. (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
  821. struct bts_struct ts = {
  822. .qualifier = bts_task_arrives,
  823. .variant.timestamp.jiffies = jiffies_64,
  824. .variant.timestamp.pid = next->pid
  825. };
  826. bts_write(next_ctx->bts_master, &ts);
  827. }
  828. wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
  829. }
  830. update_debugctlmsr(next->thread.debugctlmsr);
  831. }
  832. void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
  833. {
  834. clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR);
  835. tsk->thread.ds_ctx = NULL;
  836. }
  837. void ds_exit_thread(struct task_struct *tsk)
  838. {
  839. }