ds.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128
  1. /*
  2. * Debug Store support
  3. *
  4. * This provides a low-level interface to the hardware's Debug Store
  5. * feature that is used for branch trace store (BTS) and
  6. * precise-event based sampling (PEBS).
  7. *
  8. * It manages:
  9. * - DS and BTS hardware configuration
  10. * - buffer overflow handling (to be done)
  11. * - buffer access
  12. *
  13. * It does not do:
  14. * - security checking (is the caller allowed to trace the task)
  15. * - buffer allocation (memory accounting)
  16. *
  17. *
  18. * Copyright (C) 2007-2009 Intel Corporation.
  19. * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
  20. */
  21. #include <linux/kernel.h>
  22. #include <linux/string.h>
  23. #include <linux/errno.h>
  24. #include <linux/sched.h>
  25. #include <linux/slab.h>
  26. #include <linux/mm.h>
  27. #include <asm/ds.h>
  28. #include "ds_selftest.h"
  29. /*
  30. * The configuration for a particular DS hardware implementation:
  31. */
  32. struct ds_configuration {
  33. /* The name of the configuration: */
  34. const char *name;
  35. /* The size of pointer-typed fields in DS, BTS, and PEBS: */
  36. unsigned char sizeof_ptr_field;
  37. /* The size of a BTS/PEBS record in bytes: */
  38. unsigned char sizeof_rec[2];
  39. /* Control bit-masks indexed by enum ds_feature: */
  40. unsigned long ctl[dsf_ctl_max];
  41. };
  42. static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
  43. #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
  44. /* Maximal size of a DS configuration: */
  45. #define MAX_SIZEOF_DS (12 * 8)
  46. /* Maximal size of a BTS record: */
  47. #define MAX_SIZEOF_BTS (3 * 8)
  48. /* BTS and PEBS buffer alignment: */
  49. #define DS_ALIGNMENT (1 << 3)
  50. /* Mask of control bits in the DS MSR register: */
  51. #define BTS_CONTROL \
  52. ( ds_cfg.ctl[dsf_bts] | \
  53. ds_cfg.ctl[dsf_bts_kernel] | \
  54. ds_cfg.ctl[dsf_bts_user] | \
  55. ds_cfg.ctl[dsf_bts_overflow] )
  56. /*
  57. * A BTS or PEBS tracer.
  58. *
  59. * This holds the configuration of the tracer and serves as a handle
  60. * to identify tracers.
  61. */
  62. struct ds_tracer {
  63. /* The DS context (partially) owned by this tracer. */
  64. struct ds_context *context;
  65. /* The buffer provided on ds_request() and its size in bytes. */
  66. void *buffer;
  67. size_t size;
  68. };
  69. struct bts_tracer {
  70. /* The common DS part: */
  71. struct ds_tracer ds;
  72. /* The trace including the DS configuration: */
  73. struct bts_trace trace;
  74. /* Buffer overflow notification function: */
  75. bts_ovfl_callback_t ovfl;
  76. /* Active flags affecting trace collection. */
  77. unsigned int flags;
  78. };
  79. struct pebs_tracer {
  80. /* The common DS part: */
  81. struct ds_tracer ds;
  82. /* The trace including the DS configuration: */
  83. struct pebs_trace trace;
  84. /* Buffer overflow notification function: */
  85. pebs_ovfl_callback_t ovfl;
  86. };
  87. /*
  88. * Debug Store (DS) save area configuration (see Intel64 and IA32
  89. * Architectures Software Developer's Manual, section 18.5)
  90. *
  91. * The DS configuration consists of the following fields; different
  92. * architetures vary in the size of those fields.
  93. *
  94. * - double-word aligned base linear address of the BTS buffer
  95. * - write pointer into the BTS buffer
  96. * - end linear address of the BTS buffer (one byte beyond the end of
  97. * the buffer)
  98. * - interrupt pointer into BTS buffer
  99. * (interrupt occurs when write pointer passes interrupt pointer)
  100. * - double-word aligned base linear address of the PEBS buffer
  101. * - write pointer into the PEBS buffer
  102. * - end linear address of the PEBS buffer (one byte beyond the end of
  103. * the buffer)
  104. * - interrupt pointer into PEBS buffer
  105. * (interrupt occurs when write pointer passes interrupt pointer)
  106. * - value to which counter is reset following counter overflow
  107. *
  108. * Later architectures use 64bit pointers throughout, whereas earlier
  109. * architectures use 32bit pointers in 32bit mode.
  110. *
  111. *
  112. * We compute the base address for the first 8 fields based on:
  113. * - the field size stored in the DS configuration
  114. * - the relative field position
  115. * - an offset giving the start of the respective region
  116. *
  117. * This offset is further used to index various arrays holding
  118. * information for BTS and PEBS at the respective index.
  119. *
  120. * On later 32bit processors, we only access the lower 32bit of the
  121. * 64bit pointer fields. The upper halves will be zeroed out.
  122. */
  123. enum ds_field {
  124. ds_buffer_base = 0,
  125. ds_index,
  126. ds_absolute_maximum,
  127. ds_interrupt_threshold,
  128. };
  129. enum ds_qualifier {
  130. ds_bts = 0,
  131. ds_pebs
  132. };
  133. static inline unsigned long
  134. ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field)
  135. {
  136. base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
  137. return *(unsigned long *)base;
  138. }
  139. static inline void
  140. ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field,
  141. unsigned long value)
  142. {
  143. base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
  144. (*(unsigned long *)base) = value;
  145. }
  146. /*
  147. * Locking is done only for allocating BTS or PEBS resources.
  148. */
  149. static DEFINE_SPINLOCK(ds_lock);
  150. /*
  151. * We either support (system-wide) per-cpu or per-thread allocation.
  152. * We distinguish the two based on the task_struct pointer, where a
  153. * NULL pointer indicates per-cpu allocation for the current cpu.
  154. *
  155. * Allocations are use-counted. As soon as resources are allocated,
  156. * further allocations must be of the same type (per-cpu or
  157. * per-thread). We model this by counting allocations (i.e. the number
  158. * of tracers of a certain type) for one type negatively:
  159. * =0 no tracers
  160. * >0 number of per-thread tracers
  161. * <0 number of per-cpu tracers
  162. *
  163. * Tracers essentially gives the number of ds contexts for a certain
  164. * type of allocation.
  165. */
  166. static atomic_t tracers = ATOMIC_INIT(0);
  167. static inline int get_tracer(struct task_struct *task)
  168. {
  169. int error;
  170. spin_lock_irq(&ds_lock);
  171. if (task) {
  172. error = -EPERM;
  173. if (atomic_read(&tracers) < 0)
  174. goto out;
  175. atomic_inc(&tracers);
  176. } else {
  177. error = -EPERM;
  178. if (atomic_read(&tracers) > 0)
  179. goto out;
  180. atomic_dec(&tracers);
  181. }
  182. error = 0;
  183. out:
  184. spin_unlock_irq(&ds_lock);
  185. return error;
  186. }
  187. static inline void put_tracer(struct task_struct *task)
  188. {
  189. if (task)
  190. atomic_dec(&tracers);
  191. else
  192. atomic_inc(&tracers);
  193. }
  194. /*
  195. * The DS context is either attached to a thread or to a cpu:
  196. * - in the former case, the thread_struct contains a pointer to the
  197. * attached context.
  198. * - in the latter case, we use a static array of per-cpu context
  199. * pointers.
  200. *
  201. * Contexts are use-counted. They are allocated on first access and
  202. * deallocated when the last user puts the context.
  203. */
  204. struct ds_context {
  205. /* The DS configuration; goes into MSR_IA32_DS_AREA: */
  206. unsigned char ds[MAX_SIZEOF_DS];
  207. /* The owner of the BTS and PEBS configuration, respectively: */
  208. struct bts_tracer *bts_master;
  209. struct pebs_tracer *pebs_master;
  210. /* Use count: */
  211. unsigned long count;
  212. /* Pointer to the context pointer field: */
  213. struct ds_context **this;
  214. /* The traced task; NULL for current cpu: */
  215. struct task_struct *task;
  216. };
  217. static DEFINE_PER_CPU(struct ds_context *, system_context_array);
  218. #define system_context per_cpu(system_context_array, smp_processor_id())
  219. static inline struct ds_context *ds_get_context(struct task_struct *task)
  220. {
  221. struct ds_context **p_context =
  222. (task ? &task->thread.ds_ctx : &system_context);
  223. struct ds_context *context = NULL;
  224. struct ds_context *new_context = NULL;
  225. unsigned long irq;
  226. /*
  227. * Chances are small that we already have a context.
  228. *
  229. * Contexts for per-cpu tracing are allocated using
  230. * smp_call_function(). We must not sleep.
  231. */
  232. new_context = kzalloc(sizeof(*new_context), GFP_ATOMIC);
  233. if (!new_context)
  234. return NULL;
  235. spin_lock_irqsave(&ds_lock, irq);
  236. context = *p_context;
  237. if (!context) {
  238. context = new_context;
  239. context->this = p_context;
  240. context->task = task;
  241. context->count = 0;
  242. if (task)
  243. set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
  244. if (!task || (task == current))
  245. wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds);
  246. *p_context = context;
  247. }
  248. context->count++;
  249. spin_unlock_irqrestore(&ds_lock, irq);
  250. if (context != new_context)
  251. kfree(new_context);
  252. return context;
  253. }
  254. static inline void ds_put_context(struct ds_context *context)
  255. {
  256. struct task_struct *task;
  257. unsigned long irq;
  258. if (!context)
  259. return;
  260. spin_lock_irqsave(&ds_lock, irq);
  261. if (--context->count) {
  262. spin_unlock_irqrestore(&ds_lock, irq);
  263. return;
  264. }
  265. *(context->this) = NULL;
  266. task = context->task;
  267. if (task)
  268. clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
  269. if (!task || (task == current))
  270. wrmsrl(MSR_IA32_DS_AREA, 0);
  271. spin_unlock_irqrestore(&ds_lock, irq);
  272. /* The context might still be in use for context switching. */
  273. if (task && (task != current))
  274. wait_task_context_switch(task);
  275. kfree(context);
  276. }
  277. /*
  278. * Call the tracer's callback on a buffer overflow.
  279. *
  280. * context: the ds context
  281. * qual: the buffer type
  282. */
  283. static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
  284. {
  285. switch (qual) {
  286. case ds_bts:
  287. if (context->bts_master &&
  288. context->bts_master->ovfl)
  289. context->bts_master->ovfl(context->bts_master);
  290. break;
  291. case ds_pebs:
  292. if (context->pebs_master &&
  293. context->pebs_master->ovfl)
  294. context->pebs_master->ovfl(context->pebs_master);
  295. break;
  296. }
  297. }
  298. /*
  299. * Write raw data into the BTS or PEBS buffer.
  300. *
  301. * The remainder of any partially written record is zeroed out.
  302. *
  303. * context: the DS context
  304. * qual: the buffer type
  305. * record: the data to write
  306. * size: the size of the data
  307. */
  308. static int ds_write(struct ds_context *context, enum ds_qualifier qual,
  309. const void *record, size_t size)
  310. {
  311. int bytes_written = 0;
  312. if (!record)
  313. return -EINVAL;
  314. while (size) {
  315. unsigned long base, index, end, write_end, int_th;
  316. unsigned long write_size, adj_write_size;
  317. /*
  318. * Write as much as possible without producing an
  319. * overflow interrupt.
  320. *
  321. * Interrupt_threshold must either be
  322. * - bigger than absolute_maximum or
  323. * - point to a record between buffer_base and absolute_maximum
  324. *
  325. * Index points to a valid record.
  326. */
  327. base = ds_get(context->ds, qual, ds_buffer_base);
  328. index = ds_get(context->ds, qual, ds_index);
  329. end = ds_get(context->ds, qual, ds_absolute_maximum);
  330. int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
  331. write_end = min(end, int_th);
  332. /*
  333. * If we are already beyond the interrupt threshold,
  334. * we fill the entire buffer.
  335. */
  336. if (write_end <= index)
  337. write_end = end;
  338. if (write_end <= index)
  339. break;
  340. write_size = min((unsigned long) size, write_end - index);
  341. memcpy((void *)index, record, write_size);
  342. record = (const char *)record + write_size;
  343. size -= write_size;
  344. bytes_written += write_size;
  345. adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
  346. adj_write_size *= ds_cfg.sizeof_rec[qual];
  347. /* Zero out trailing bytes. */
  348. memset((char *)index + write_size, 0,
  349. adj_write_size - write_size);
  350. index += adj_write_size;
  351. if (index >= end)
  352. index = base;
  353. ds_set(context->ds, qual, ds_index, index);
  354. if (index >= int_th)
  355. ds_overflow(context, qual);
  356. }
  357. return bytes_written;
  358. }
  359. /*
  360. * Branch Trace Store (BTS) uses the following format. Different
  361. * architectures vary in the size of those fields.
  362. * - source linear address
  363. * - destination linear address
  364. * - flags
  365. *
  366. * Later architectures use 64bit pointers throughout, whereas earlier
  367. * architectures use 32bit pointers in 32bit mode.
  368. *
  369. * We compute the base address for the fields based on:
  370. * - the field size stored in the DS configuration
  371. * - the relative field position
  372. *
  373. * In order to store additional information in the BTS buffer, we use
  374. * a special source address to indicate that the record requires
  375. * special interpretation.
  376. *
  377. * Netburst indicated via a bit in the flags field whether the branch
  378. * was predicted; this is ignored.
  379. *
  380. * We use two levels of abstraction:
  381. * - the raw data level defined here
  382. * - an arch-independent level defined in ds.h
  383. */
  384. enum bts_field {
  385. bts_from,
  386. bts_to,
  387. bts_flags,
  388. bts_qual = bts_from,
  389. bts_jiffies = bts_to,
  390. bts_pid = bts_flags,
  391. bts_qual_mask = (bts_qual_max - 1),
  392. bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
  393. };
  394. static inline unsigned long bts_get(const char *base, enum bts_field field)
  395. {
  396. base += (ds_cfg.sizeof_ptr_field * field);
  397. return *(unsigned long *)base;
  398. }
  399. static inline void bts_set(char *base, enum bts_field field, unsigned long val)
  400. {
  401. base += (ds_cfg.sizeof_ptr_field * field);;
  402. (*(unsigned long *)base) = val;
  403. }
  404. /*
  405. * The raw BTS data is architecture dependent.
  406. *
  407. * For higher-level users, we give an arch-independent view.
  408. * - ds.h defines struct bts_struct
  409. * - bts_read translates one raw bts record into a bts_struct
  410. * - bts_write translates one bts_struct into the raw format and
  411. * writes it into the top of the parameter tracer's buffer.
  412. *
  413. * return: bytes read/written on success; -Eerrno, otherwise
  414. */
  415. static int
  416. bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
  417. {
  418. if (!tracer)
  419. return -EINVAL;
  420. if (at < tracer->trace.ds.begin)
  421. return -EINVAL;
  422. if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
  423. return -EINVAL;
  424. memset(out, 0, sizeof(*out));
  425. if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
  426. out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
  427. out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
  428. out->variant.timestamp.pid = bts_get(at, bts_pid);
  429. } else {
  430. out->qualifier = bts_branch;
  431. out->variant.lbr.from = bts_get(at, bts_from);
  432. out->variant.lbr.to = bts_get(at, bts_to);
  433. if (!out->variant.lbr.from && !out->variant.lbr.to)
  434. out->qualifier = bts_invalid;
  435. }
  436. return ds_cfg.sizeof_rec[ds_bts];
  437. }
  438. static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
  439. {
  440. unsigned char raw[MAX_SIZEOF_BTS];
  441. if (!tracer)
  442. return -EINVAL;
  443. if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
  444. return -EOVERFLOW;
  445. switch (in->qualifier) {
  446. case bts_invalid:
  447. bts_set(raw, bts_from, 0);
  448. bts_set(raw, bts_to, 0);
  449. bts_set(raw, bts_flags, 0);
  450. break;
  451. case bts_branch:
  452. bts_set(raw, bts_from, in->variant.lbr.from);
  453. bts_set(raw, bts_to, in->variant.lbr.to);
  454. bts_set(raw, bts_flags, 0);
  455. break;
  456. case bts_task_arrives:
  457. case bts_task_departs:
  458. bts_set(raw, bts_qual, (bts_escape | in->qualifier));
  459. bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
  460. bts_set(raw, bts_pid, in->variant.timestamp.pid);
  461. break;
  462. default:
  463. return -EINVAL;
  464. }
  465. return ds_write(tracer->ds.context, ds_bts, raw,
  466. ds_cfg.sizeof_rec[ds_bts]);
  467. }
  468. static void ds_write_config(struct ds_context *context,
  469. struct ds_trace *cfg, enum ds_qualifier qual)
  470. {
  471. unsigned char *ds = context->ds;
  472. ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
  473. ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
  474. ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
  475. ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
  476. }
  477. static void ds_read_config(struct ds_context *context,
  478. struct ds_trace *cfg, enum ds_qualifier qual)
  479. {
  480. unsigned char *ds = context->ds;
  481. cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
  482. cfg->top = (void *)ds_get(ds, qual, ds_index);
  483. cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
  484. cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
  485. }
  486. static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
  487. void *base, size_t size, size_t ith,
  488. unsigned int flags) {
  489. unsigned long buffer, adj;
  490. /*
  491. * Adjust the buffer address and size to meet alignment
  492. * constraints:
  493. * - buffer is double-word aligned
  494. * - size is multiple of record size
  495. *
  496. * We checked the size at the very beginning; we have enough
  497. * space to do the adjustment.
  498. */
  499. buffer = (unsigned long)base;
  500. adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
  501. buffer += adj;
  502. size -= adj;
  503. trace->n = size / ds_cfg.sizeof_rec[qual];
  504. trace->size = ds_cfg.sizeof_rec[qual];
  505. size = (trace->n * trace->size);
  506. trace->begin = (void *)buffer;
  507. trace->top = trace->begin;
  508. trace->end = (void *)(buffer + size);
  509. /*
  510. * The value for 'no threshold' is -1, which will set the
  511. * threshold outside of the buffer, just like we want it.
  512. */
  513. trace->ith = (void *)(buffer + size - ith);
  514. trace->flags = flags;
  515. }
  516. static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
  517. enum ds_qualifier qual, struct task_struct *task,
  518. void *base, size_t size, size_t th, unsigned int flags)
  519. {
  520. struct ds_context *context;
  521. int error;
  522. error = -EOPNOTSUPP;
  523. if (!ds_cfg.sizeof_rec[qual])
  524. goto out;
  525. error = -EINVAL;
  526. if (!base)
  527. goto out;
  528. /* We require some space to do alignment adjustments below. */
  529. error = -EINVAL;
  530. if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
  531. goto out;
  532. if (th != (size_t)-1) {
  533. th *= ds_cfg.sizeof_rec[qual];
  534. error = -EINVAL;
  535. if (size <= th)
  536. goto out;
  537. }
  538. tracer->buffer = base;
  539. tracer->size = size;
  540. error = -ENOMEM;
  541. context = ds_get_context(task);
  542. if (!context)
  543. goto out;
  544. tracer->context = context;
  545. ds_init_ds_trace(trace, qual, base, size, th, flags);
  546. error = 0;
  547. out:
  548. return error;
  549. }
  550. struct bts_tracer *ds_request_bts(struct task_struct *task,
  551. void *base, size_t size,
  552. bts_ovfl_callback_t ovfl, size_t th,
  553. unsigned int flags)
  554. {
  555. struct bts_tracer *tracer;
  556. unsigned long irq;
  557. int error;
  558. /* Buffer overflow notification is not yet implemented. */
  559. error = -EOPNOTSUPP;
  560. if (ovfl)
  561. goto out;
  562. error = get_tracer(task);
  563. if (error < 0)
  564. goto out;
  565. /*
  566. * Per-cpu tracing is typically requested using smp_call_function().
  567. * We must not sleep.
  568. */
  569. error = -ENOMEM;
  570. tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC);
  571. if (!tracer)
  572. goto out_put_tracer;
  573. tracer->ovfl = ovfl;
  574. error = ds_request(&tracer->ds, &tracer->trace.ds,
  575. ds_bts, task, base, size, th, flags);
  576. if (error < 0)
  577. goto out_tracer;
  578. spin_lock_irqsave(&ds_lock, irq);
  579. error = -EPERM;
  580. if (tracer->ds.context->bts_master)
  581. goto out_unlock;
  582. tracer->ds.context->bts_master = tracer;
  583. spin_unlock_irqrestore(&ds_lock, irq);
  584. tracer->trace.read = bts_read;
  585. tracer->trace.write = bts_write;
  586. ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
  587. ds_resume_bts(tracer);
  588. return tracer;
  589. out_unlock:
  590. spin_unlock_irqrestore(&ds_lock, irq);
  591. ds_put_context(tracer->ds.context);
  592. out_tracer:
  593. kfree(tracer);
  594. out_put_tracer:
  595. put_tracer(task);
  596. out:
  597. return ERR_PTR(error);
  598. }
  599. struct pebs_tracer *ds_request_pebs(struct task_struct *task,
  600. void *base, size_t size,
  601. pebs_ovfl_callback_t ovfl, size_t th,
  602. unsigned int flags)
  603. {
  604. struct pebs_tracer *tracer;
  605. unsigned long irq;
  606. int error;
  607. /* Buffer overflow notification is not yet implemented. */
  608. error = -EOPNOTSUPP;
  609. if (ovfl)
  610. goto out;
  611. error = get_tracer(task);
  612. if (error < 0)
  613. goto out;
  614. /*
  615. * Per-cpu tracing is typically requested using smp_call_function().
  616. * We must not sleep.
  617. */
  618. error = -ENOMEM;
  619. tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC);
  620. if (!tracer)
  621. goto out_put_tracer;
  622. tracer->ovfl = ovfl;
  623. error = ds_request(&tracer->ds, &tracer->trace.ds,
  624. ds_pebs, task, base, size, th, flags);
  625. if (error < 0)
  626. goto out_tracer;
  627. spin_lock_irqsave(&ds_lock, irq);
  628. error = -EPERM;
  629. if (tracer->ds.context->pebs_master)
  630. goto out_unlock;
  631. tracer->ds.context->pebs_master = tracer;
  632. spin_unlock_irqrestore(&ds_lock, irq);
  633. ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
  634. ds_resume_pebs(tracer);
  635. return tracer;
  636. out_unlock:
  637. spin_unlock_irqrestore(&ds_lock, irq);
  638. ds_put_context(tracer->ds.context);
  639. out_tracer:
  640. kfree(tracer);
  641. out_put_tracer:
  642. put_tracer(task);
  643. out:
  644. return ERR_PTR(error);
  645. }
  646. void ds_release_bts(struct bts_tracer *tracer)
  647. {
  648. struct task_struct *task;
  649. if (!tracer)
  650. return;
  651. task = tracer->ds.context->task;
  652. ds_suspend_bts(tracer);
  653. WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
  654. tracer->ds.context->bts_master = NULL;
  655. /* Make sure tracing stopped and the tracer is not in use. */
  656. if (task && (task != current))
  657. wait_task_context_switch(task);
  658. ds_put_context(tracer->ds.context);
  659. put_tracer(task);
  660. kfree(tracer);
  661. }
  662. void ds_suspend_bts(struct bts_tracer *tracer)
  663. {
  664. struct task_struct *task;
  665. if (!tracer)
  666. return;
  667. tracer->flags = 0;
  668. task = tracer->ds.context->task;
  669. if (!task || (task == current))
  670. update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
  671. if (task) {
  672. task->thread.debugctlmsr &= ~BTS_CONTROL;
  673. if (!task->thread.debugctlmsr)
  674. clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
  675. }
  676. }
  677. void ds_resume_bts(struct bts_tracer *tracer)
  678. {
  679. struct task_struct *task;
  680. unsigned long control;
  681. if (!tracer)
  682. return;
  683. tracer->flags = tracer->trace.ds.flags;
  684. task = tracer->ds.context->task;
  685. control = ds_cfg.ctl[dsf_bts];
  686. if (!(tracer->trace.ds.flags & BTS_KERNEL))
  687. control |= ds_cfg.ctl[dsf_bts_kernel];
  688. if (!(tracer->trace.ds.flags & BTS_USER))
  689. control |= ds_cfg.ctl[dsf_bts_user];
  690. if (task) {
  691. task->thread.debugctlmsr |= control;
  692. set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
  693. }
  694. if (!task || (task == current))
  695. update_debugctlmsr(get_debugctlmsr() | control);
  696. }
  697. void ds_release_pebs(struct pebs_tracer *tracer)
  698. {
  699. struct task_struct *task;
  700. if (!tracer)
  701. return;
  702. task = tracer->ds.context->task;
  703. ds_suspend_pebs(tracer);
  704. WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
  705. tracer->ds.context->pebs_master = NULL;
  706. ds_put_context(tracer->ds.context);
  707. put_tracer(task);
  708. kfree(tracer);
  709. }
  710. void ds_suspend_pebs(struct pebs_tracer *tracer)
  711. {
  712. }
  713. void ds_resume_pebs(struct pebs_tracer *tracer)
  714. {
  715. }
  716. const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
  717. {
  718. if (!tracer)
  719. return NULL;
  720. ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
  721. return &tracer->trace;
  722. }
  723. const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
  724. {
  725. if (!tracer)
  726. return NULL;
  727. ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
  728. tracer->trace.reset_value =
  729. *(u64 *)(tracer->ds.context->ds +
  730. (ds_cfg.sizeof_ptr_field * 8));
  731. return &tracer->trace;
  732. }
  733. int ds_reset_bts(struct bts_tracer *tracer)
  734. {
  735. if (!tracer)
  736. return -EINVAL;
  737. tracer->trace.ds.top = tracer->trace.ds.begin;
  738. ds_set(tracer->ds.context->ds, ds_bts, ds_index,
  739. (unsigned long)tracer->trace.ds.top);
  740. return 0;
  741. }
  742. int ds_reset_pebs(struct pebs_tracer *tracer)
  743. {
  744. if (!tracer)
  745. return -EINVAL;
  746. tracer->trace.ds.top = tracer->trace.ds.begin;
  747. ds_set(tracer->ds.context->ds, ds_bts, ds_index,
  748. (unsigned long)tracer->trace.ds.top);
  749. return 0;
  750. }
  751. int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
  752. {
  753. if (!tracer)
  754. return -EINVAL;
  755. *(u64 *)(tracer->ds.context->ds +
  756. (ds_cfg.sizeof_ptr_field * 8)) = value;
  757. return 0;
  758. }
  759. static const struct ds_configuration ds_cfg_netburst = {
  760. .name = "Netburst",
  761. .ctl[dsf_bts] = (1 << 2) | (1 << 3),
  762. .ctl[dsf_bts_kernel] = (1 << 5),
  763. .ctl[dsf_bts_user] = (1 << 6),
  764. };
  765. static const struct ds_configuration ds_cfg_pentium_m = {
  766. .name = "Pentium M",
  767. .ctl[dsf_bts] = (1 << 6) | (1 << 7),
  768. };
  769. static const struct ds_configuration ds_cfg_core2_atom = {
  770. .name = "Core 2/Atom",
  771. .ctl[dsf_bts] = (1 << 6) | (1 << 7),
  772. .ctl[dsf_bts_kernel] = (1 << 9),
  773. .ctl[dsf_bts_user] = (1 << 10),
  774. };
  775. static void
  776. ds_configure(const struct ds_configuration *cfg,
  777. struct cpuinfo_x86 *cpu)
  778. {
  779. unsigned long nr_pebs_fields = 0;
  780. printk(KERN_INFO "[ds] using %s configuration\n", cfg->name);
  781. #ifdef __i386__
  782. nr_pebs_fields = 10;
  783. #else
  784. nr_pebs_fields = 18;
  785. #endif
  786. memset(&ds_cfg, 0, sizeof(ds_cfg));
  787. ds_cfg = *cfg;
  788. ds_cfg.sizeof_ptr_field =
  789. (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4);
  790. ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3;
  791. ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields;
  792. if (!cpu_has(cpu, X86_FEATURE_BTS)) {
  793. ds_cfg.sizeof_rec[ds_bts] = 0;
  794. printk(KERN_INFO "[ds] bts not available\n");
  795. }
  796. if (!cpu_has(cpu, X86_FEATURE_PEBS)) {
  797. ds_cfg.sizeof_rec[ds_pebs] = 0;
  798. printk(KERN_INFO "[ds] pebs not available\n");
  799. }
  800. if (ds_cfg.sizeof_rec[ds_bts]) {
  801. int error;
  802. error = ds_selftest_bts();
  803. if (error) {
  804. WARN(1, "[ds] selftest failed. disabling bts.\n");
  805. ds_cfg.sizeof_rec[ds_bts] = 0;
  806. }
  807. }
  808. if (ds_cfg.sizeof_rec[ds_pebs]) {
  809. int error;
  810. error = ds_selftest_pebs();
  811. if (error) {
  812. WARN(1, "[ds] selftest failed. disabling pebs.\n");
  813. ds_cfg.sizeof_rec[ds_pebs] = 0;
  814. }
  815. }
  816. printk(KERN_INFO "[ds] sizes: address: %u bit, ",
  817. 8 * ds_cfg.sizeof_ptr_field);
  818. printk("bts/pebs record: %u/%u bytes\n",
  819. ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
  820. WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_ptr_field));
  821. }
  822. void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
  823. {
  824. switch (c->x86) {
  825. case 0x6:
  826. switch (c->x86_model) {
  827. case 0x9:
  828. case 0xd: /* Pentium M */
  829. ds_configure(&ds_cfg_pentium_m, c);
  830. break;
  831. case 0xf:
  832. case 0x17: /* Core2 */
  833. case 0x1c: /* Atom */
  834. ds_configure(&ds_cfg_core2_atom, c);
  835. break;
  836. case 0x1a: /* Core i7 */
  837. default:
  838. /* Sorry, don't know about them. */
  839. break;
  840. }
  841. break;
  842. case 0xf:
  843. switch (c->x86_model) {
  844. case 0x0:
  845. case 0x1:
  846. case 0x2: /* Netburst */
  847. ds_configure(&ds_cfg_netburst, c);
  848. break;
  849. default:
  850. /* Sorry, don't know about them. */
  851. break;
  852. }
  853. break;
  854. default:
  855. /* Sorry, don't know about them. */
  856. break;
  857. }
  858. }
  859. static inline void ds_take_timestamp(struct ds_context *context,
  860. enum bts_qualifier qualifier,
  861. struct task_struct *task)
  862. {
  863. struct bts_tracer *tracer = context->bts_master;
  864. struct bts_struct ts;
  865. /* Prevent compilers from reading the tracer pointer twice. */
  866. barrier();
  867. if (!tracer || !(tracer->flags & BTS_TIMESTAMPS))
  868. return;
  869. memset(&ts, 0, sizeof(ts));
  870. ts.qualifier = qualifier;
  871. ts.variant.timestamp.jiffies = jiffies_64;
  872. ts.variant.timestamp.pid = task->pid;
  873. bts_write(tracer, &ts);
  874. }
  875. /*
  876. * Change the DS configuration from tracing prev to tracing next.
  877. */
  878. void ds_switch_to(struct task_struct *prev, struct task_struct *next)
  879. {
  880. struct ds_context *prev_ctx = prev->thread.ds_ctx;
  881. struct ds_context *next_ctx = next->thread.ds_ctx;
  882. unsigned long debugctlmsr = next->thread.debugctlmsr;
  883. /* Make sure all data is read before we start. */
  884. barrier();
  885. if (prev_ctx) {
  886. update_debugctlmsr(0);
  887. ds_take_timestamp(prev_ctx, bts_task_departs, prev);
  888. }
  889. if (next_ctx) {
  890. ds_take_timestamp(next_ctx, bts_task_arrives, next);
  891. wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
  892. }
  893. update_debugctlmsr(debugctlmsr);
  894. }
  895. void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
  896. {
  897. clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR);
  898. tsk->thread.ds_ctx = NULL;
  899. }
  900. void ds_exit_thread(struct task_struct *tsk)
  901. {
  902. }