ds.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389
  1. /*
  2. * Debug Store support
  3. *
  4. * This provides a low-level interface to the hardware's Debug Store
  5. * feature that is used for branch trace store (BTS) and
  6. * precise-event based sampling (PEBS).
  7. *
  8. * It manages:
  9. * - DS and BTS hardware configuration
  10. * - buffer overflow handling (to be done)
  11. * - buffer access
  12. *
  13. * It does not do:
  14. * - security checking (is the caller allowed to trace the task)
  15. * - buffer allocation (memory accounting)
  16. *
  17. *
  18. * Copyright (C) 2007-2009 Intel Corporation.
  19. * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
  20. */
  21. #include <linux/kernel.h>
  22. #include <linux/string.h>
  23. #include <linux/errno.h>
  24. #include <linux/sched.h>
  25. #include <linux/slab.h>
  26. #include <linux/mm.h>
  27. #include <linux/trace_clock.h>
  28. #include <asm/ds.h>
  29. #include "ds_selftest.h"
  30. /*
  31. * The configuration for a particular DS hardware implementation:
  32. */
  33. struct ds_configuration {
  34. /* The name of the configuration: */
  35. const char *name;
  36. /* The size of pointer-typed fields in DS, BTS, and PEBS: */
  37. unsigned char sizeof_ptr_field;
  38. /* The size of a BTS/PEBS record in bytes: */
  39. unsigned char sizeof_rec[2];
  40. /* Control bit-masks indexed by enum ds_feature: */
  41. unsigned long ctl[dsf_ctl_max];
  42. };
  43. static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
  44. #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
  45. /* Maximal size of a DS configuration: */
  46. #define MAX_SIZEOF_DS (12 * 8)
  47. /* Maximal size of a BTS record: */
  48. #define MAX_SIZEOF_BTS (3 * 8)
  49. /* BTS and PEBS buffer alignment: */
  50. #define DS_ALIGNMENT (1 << 3)
  51. /* Mask of control bits in the DS MSR register: */
  52. #define BTS_CONTROL \
  53. ( ds_cfg.ctl[dsf_bts] | \
  54. ds_cfg.ctl[dsf_bts_kernel] | \
  55. ds_cfg.ctl[dsf_bts_user] | \
  56. ds_cfg.ctl[dsf_bts_overflow] )
  57. /*
  58. * A BTS or PEBS tracer.
  59. *
  60. * This holds the configuration of the tracer and serves as a handle
  61. * to identify tracers.
  62. */
  63. struct ds_tracer {
  64. /* The DS context (partially) owned by this tracer. */
  65. struct ds_context *context;
  66. /* The buffer provided on ds_request() and its size in bytes. */
  67. void *buffer;
  68. size_t size;
  69. };
  70. struct bts_tracer {
  71. /* The common DS part: */
  72. struct ds_tracer ds;
  73. /* The trace including the DS configuration: */
  74. struct bts_trace trace;
  75. /* Buffer overflow notification function: */
  76. bts_ovfl_callback_t ovfl;
  77. /* Active flags affecting trace collection. */
  78. unsigned int flags;
  79. };
  80. struct pebs_tracer {
  81. /* The common DS part: */
  82. struct ds_tracer ds;
  83. /* The trace including the DS configuration: */
  84. struct pebs_trace trace;
  85. /* Buffer overflow notification function: */
  86. pebs_ovfl_callback_t ovfl;
  87. };
  88. /*
  89. * Debug Store (DS) save area configuration (see Intel64 and IA32
  90. * Architectures Software Developer's Manual, section 18.5)
  91. *
  92. * The DS configuration consists of the following fields; different
  93. * architetures vary in the size of those fields.
  94. *
  95. * - double-word aligned base linear address of the BTS buffer
  96. * - write pointer into the BTS buffer
  97. * - end linear address of the BTS buffer (one byte beyond the end of
  98. * the buffer)
  99. * - interrupt pointer into BTS buffer
  100. * (interrupt occurs when write pointer passes interrupt pointer)
  101. * - double-word aligned base linear address of the PEBS buffer
  102. * - write pointer into the PEBS buffer
  103. * - end linear address of the PEBS buffer (one byte beyond the end of
  104. * the buffer)
  105. * - interrupt pointer into PEBS buffer
  106. * (interrupt occurs when write pointer passes interrupt pointer)
  107. * - value to which counter is reset following counter overflow
  108. *
  109. * Later architectures use 64bit pointers throughout, whereas earlier
  110. * architectures use 32bit pointers in 32bit mode.
  111. *
  112. *
  113. * We compute the base address for the first 8 fields based on:
  114. * - the field size stored in the DS configuration
  115. * - the relative field position
  116. * - an offset giving the start of the respective region
  117. *
  118. * This offset is further used to index various arrays holding
  119. * information for BTS and PEBS at the respective index.
  120. *
  121. * On later 32bit processors, we only access the lower 32bit of the
  122. * 64bit pointer fields. The upper halves will be zeroed out.
  123. */
  124. enum ds_field {
  125. ds_buffer_base = 0,
  126. ds_index,
  127. ds_absolute_maximum,
  128. ds_interrupt_threshold,
  129. };
  130. enum ds_qualifier {
  131. ds_bts = 0,
  132. ds_pebs
  133. };
  134. static inline unsigned long
  135. ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field)
  136. {
  137. base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
  138. return *(unsigned long *)base;
  139. }
  140. static inline void
  141. ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field,
  142. unsigned long value)
  143. {
  144. base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
  145. (*(unsigned long *)base) = value;
  146. }
  147. /*
  148. * Locking is done only for allocating BTS or PEBS resources.
  149. */
  150. static DEFINE_SPINLOCK(ds_lock);
  151. /*
  152. * We either support (system-wide) per-cpu or per-thread allocation.
  153. * We distinguish the two based on the task_struct pointer, where a
  154. * NULL pointer indicates per-cpu allocation for the current cpu.
  155. *
  156. * Allocations are use-counted. As soon as resources are allocated,
  157. * further allocations must be of the same type (per-cpu or
  158. * per-thread). We model this by counting allocations (i.e. the number
  159. * of tracers of a certain type) for one type negatively:
  160. * =0 no tracers
  161. * >0 number of per-thread tracers
  162. * <0 number of per-cpu tracers
  163. *
  164. * Tracers essentially gives the number of ds contexts for a certain
  165. * type of allocation.
  166. */
  167. static atomic_t tracers = ATOMIC_INIT(0);
  168. static inline int get_tracer(struct task_struct *task)
  169. {
  170. int error;
  171. spin_lock_irq(&ds_lock);
  172. if (task) {
  173. error = -EPERM;
  174. if (atomic_read(&tracers) < 0)
  175. goto out;
  176. atomic_inc(&tracers);
  177. } else {
  178. error = -EPERM;
  179. if (atomic_read(&tracers) > 0)
  180. goto out;
  181. atomic_dec(&tracers);
  182. }
  183. error = 0;
  184. out:
  185. spin_unlock_irq(&ds_lock);
  186. return error;
  187. }
  188. static inline void put_tracer(struct task_struct *task)
  189. {
  190. if (task)
  191. atomic_dec(&tracers);
  192. else
  193. atomic_inc(&tracers);
  194. }
  195. /*
  196. * The DS context is either attached to a thread or to a cpu:
  197. * - in the former case, the thread_struct contains a pointer to the
  198. * attached context.
  199. * - in the latter case, we use a static array of per-cpu context
  200. * pointers.
  201. *
  202. * Contexts are use-counted. They are allocated on first access and
  203. * deallocated when the last user puts the context.
  204. */
  205. struct ds_context {
  206. /* The DS configuration; goes into MSR_IA32_DS_AREA: */
  207. unsigned char ds[MAX_SIZEOF_DS];
  208. /* The owner of the BTS and PEBS configuration, respectively: */
  209. struct bts_tracer *bts_master;
  210. struct pebs_tracer *pebs_master;
  211. /* Use count: */
  212. unsigned long count;
  213. /* Pointer to the context pointer field: */
  214. struct ds_context **this;
  215. /* The traced task; NULL for cpu tracing: */
  216. struct task_struct *task;
  217. /* The traced cpu; only valid if task is NULL: */
  218. int cpu;
  219. };
  220. static DEFINE_PER_CPU(struct ds_context *, cpu_context);
  221. static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
  222. {
  223. struct ds_context **p_context =
  224. (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu));
  225. struct ds_context *context = NULL;
  226. struct ds_context *new_context = NULL;
  227. /* Chances are small that we already have a context. */
  228. new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
  229. if (!new_context)
  230. return NULL;
  231. spin_lock_irq(&ds_lock);
  232. context = *p_context;
  233. if (likely(!context)) {
  234. context = new_context;
  235. context->this = p_context;
  236. context->task = task;
  237. context->cpu = cpu;
  238. context->count = 0;
  239. *p_context = context;
  240. }
  241. context->count++;
  242. spin_unlock_irq(&ds_lock);
  243. if (context != new_context)
  244. kfree(new_context);
  245. return context;
  246. }
  247. static void ds_put_context(struct ds_context *context)
  248. {
  249. struct task_struct *task;
  250. unsigned long irq;
  251. if (!context)
  252. return;
  253. spin_lock_irqsave(&ds_lock, irq);
  254. if (--context->count) {
  255. spin_unlock_irqrestore(&ds_lock, irq);
  256. return;
  257. }
  258. *(context->this) = NULL;
  259. task = context->task;
  260. if (task)
  261. clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
  262. /*
  263. * We leave the (now dangling) pointer to the DS configuration in
  264. * the DS_AREA msr. This is as good or as bad as replacing it with
  265. * NULL - the hardware would crash if we enabled tracing.
  266. *
  267. * This saves us some problems with having to write an msr on a
  268. * different cpu while preventing others from doing the same for the
  269. * next context for that same cpu.
  270. */
  271. spin_unlock_irqrestore(&ds_lock, irq);
  272. /* The context might still be in use for context switching. */
  273. if (task && (task != current))
  274. wait_task_context_switch(task);
  275. kfree(context);
  276. }
  277. static void ds_install_ds_area(struct ds_context *context)
  278. {
  279. unsigned long ds;
  280. ds = (unsigned long)context->ds;
  281. /*
  282. * There is a race between the bts master and the pebs master.
  283. *
  284. * The thread/cpu access is synchronized via get/put_cpu() for
  285. * task tracing and via wrmsr_on_cpu for cpu tracing.
  286. *
  287. * If bts and pebs are collected for the same task or same cpu,
  288. * the same confiuration is written twice.
  289. */
  290. if (context->task) {
  291. get_cpu();
  292. if (context->task == current)
  293. wrmsrl(MSR_IA32_DS_AREA, ds);
  294. set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
  295. put_cpu();
  296. } else
  297. wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA,
  298. (u32)((u64)ds), (u32)((u64)ds >> 32));
  299. }
  300. /*
  301. * Call the tracer's callback on a buffer overflow.
  302. *
  303. * context: the ds context
  304. * qual: the buffer type
  305. */
  306. static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
  307. {
  308. switch (qual) {
  309. case ds_bts:
  310. if (context->bts_master &&
  311. context->bts_master->ovfl)
  312. context->bts_master->ovfl(context->bts_master);
  313. break;
  314. case ds_pebs:
  315. if (context->pebs_master &&
  316. context->pebs_master->ovfl)
  317. context->pebs_master->ovfl(context->pebs_master);
  318. break;
  319. }
  320. }
  321. /*
  322. * Write raw data into the BTS or PEBS buffer.
  323. *
  324. * The remainder of any partially written record is zeroed out.
  325. *
  326. * context: the DS context
  327. * qual: the buffer type
  328. * record: the data to write
  329. * size: the size of the data
  330. */
  331. static int ds_write(struct ds_context *context, enum ds_qualifier qual,
  332. const void *record, size_t size)
  333. {
  334. int bytes_written = 0;
  335. if (!record)
  336. return -EINVAL;
  337. while (size) {
  338. unsigned long base, index, end, write_end, int_th;
  339. unsigned long write_size, adj_write_size;
  340. /*
  341. * Write as much as possible without producing an
  342. * overflow interrupt.
  343. *
  344. * Interrupt_threshold must either be
  345. * - bigger than absolute_maximum or
  346. * - point to a record between buffer_base and absolute_maximum
  347. *
  348. * Index points to a valid record.
  349. */
  350. base = ds_get(context->ds, qual, ds_buffer_base);
  351. index = ds_get(context->ds, qual, ds_index);
  352. end = ds_get(context->ds, qual, ds_absolute_maximum);
  353. int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
  354. write_end = min(end, int_th);
  355. /*
  356. * If we are already beyond the interrupt threshold,
  357. * we fill the entire buffer.
  358. */
  359. if (write_end <= index)
  360. write_end = end;
  361. if (write_end <= index)
  362. break;
  363. write_size = min((unsigned long) size, write_end - index);
  364. memcpy((void *)index, record, write_size);
  365. record = (const char *)record + write_size;
  366. size -= write_size;
  367. bytes_written += write_size;
  368. adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
  369. adj_write_size *= ds_cfg.sizeof_rec[qual];
  370. /* Zero out trailing bytes. */
  371. memset((char *)index + write_size, 0,
  372. adj_write_size - write_size);
  373. index += adj_write_size;
  374. if (index >= end)
  375. index = base;
  376. ds_set(context->ds, qual, ds_index, index);
  377. if (index >= int_th)
  378. ds_overflow(context, qual);
  379. }
  380. return bytes_written;
  381. }
  382. /*
  383. * Branch Trace Store (BTS) uses the following format. Different
  384. * architectures vary in the size of those fields.
  385. * - source linear address
  386. * - destination linear address
  387. * - flags
  388. *
  389. * Later architectures use 64bit pointers throughout, whereas earlier
  390. * architectures use 32bit pointers in 32bit mode.
  391. *
  392. * We compute the base address for the fields based on:
  393. * - the field size stored in the DS configuration
  394. * - the relative field position
  395. *
  396. * In order to store additional information in the BTS buffer, we use
  397. * a special source address to indicate that the record requires
  398. * special interpretation.
  399. *
  400. * Netburst indicated via a bit in the flags field whether the branch
  401. * was predicted; this is ignored.
  402. *
  403. * We use two levels of abstraction:
  404. * - the raw data level defined here
  405. * - an arch-independent level defined in ds.h
  406. */
  407. enum bts_field {
  408. bts_from,
  409. bts_to,
  410. bts_flags,
  411. bts_qual = bts_from,
  412. bts_clock = bts_to,
  413. bts_pid = bts_flags,
  414. bts_qual_mask = (bts_qual_max - 1),
  415. bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
  416. };
  417. static inline unsigned long bts_get(const char *base, enum bts_field field)
  418. {
  419. base += (ds_cfg.sizeof_ptr_field * field);
  420. return *(unsigned long *)base;
  421. }
  422. static inline void bts_set(char *base, enum bts_field field, unsigned long val)
  423. {
  424. base += (ds_cfg.sizeof_ptr_field * field);;
  425. (*(unsigned long *)base) = val;
  426. }
  427. /*
  428. * The raw BTS data is architecture dependent.
  429. *
  430. * For higher-level users, we give an arch-independent view.
  431. * - ds.h defines struct bts_struct
  432. * - bts_read translates one raw bts record into a bts_struct
  433. * - bts_write translates one bts_struct into the raw format and
  434. * writes it into the top of the parameter tracer's buffer.
  435. *
  436. * return: bytes read/written on success; -Eerrno, otherwise
  437. */
  438. static int
  439. bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
  440. {
  441. if (!tracer)
  442. return -EINVAL;
  443. if (at < tracer->trace.ds.begin)
  444. return -EINVAL;
  445. if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
  446. return -EINVAL;
  447. memset(out, 0, sizeof(*out));
  448. if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
  449. out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
  450. out->variant.event.clock = bts_get(at, bts_clock);
  451. out->variant.event.pid = bts_get(at, bts_pid);
  452. } else {
  453. out->qualifier = bts_branch;
  454. out->variant.lbr.from = bts_get(at, bts_from);
  455. out->variant.lbr.to = bts_get(at, bts_to);
  456. if (!out->variant.lbr.from && !out->variant.lbr.to)
  457. out->qualifier = bts_invalid;
  458. }
  459. return ds_cfg.sizeof_rec[ds_bts];
  460. }
  461. static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
  462. {
  463. unsigned char raw[MAX_SIZEOF_BTS];
  464. if (!tracer)
  465. return -EINVAL;
  466. if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
  467. return -EOVERFLOW;
  468. switch (in->qualifier) {
  469. case bts_invalid:
  470. bts_set(raw, bts_from, 0);
  471. bts_set(raw, bts_to, 0);
  472. bts_set(raw, bts_flags, 0);
  473. break;
  474. case bts_branch:
  475. bts_set(raw, bts_from, in->variant.lbr.from);
  476. bts_set(raw, bts_to, in->variant.lbr.to);
  477. bts_set(raw, bts_flags, 0);
  478. break;
  479. case bts_task_arrives:
  480. case bts_task_departs:
  481. bts_set(raw, bts_qual, (bts_escape | in->qualifier));
  482. bts_set(raw, bts_clock, in->variant.event.clock);
  483. bts_set(raw, bts_pid, in->variant.event.pid);
  484. break;
  485. default:
  486. return -EINVAL;
  487. }
  488. return ds_write(tracer->ds.context, ds_bts, raw,
  489. ds_cfg.sizeof_rec[ds_bts]);
  490. }
  491. static void ds_write_config(struct ds_context *context,
  492. struct ds_trace *cfg, enum ds_qualifier qual)
  493. {
  494. unsigned char *ds = context->ds;
  495. ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
  496. ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
  497. ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
  498. ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
  499. }
  500. static void ds_read_config(struct ds_context *context,
  501. struct ds_trace *cfg, enum ds_qualifier qual)
  502. {
  503. unsigned char *ds = context->ds;
  504. cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
  505. cfg->top = (void *)ds_get(ds, qual, ds_index);
  506. cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
  507. cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
  508. }
  509. static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
  510. void *base, size_t size, size_t ith,
  511. unsigned int flags) {
  512. unsigned long buffer, adj;
  513. /*
  514. * Adjust the buffer address and size to meet alignment
  515. * constraints:
  516. * - buffer is double-word aligned
  517. * - size is multiple of record size
  518. *
  519. * We checked the size at the very beginning; we have enough
  520. * space to do the adjustment.
  521. */
  522. buffer = (unsigned long)base;
  523. adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
  524. buffer += adj;
  525. size -= adj;
  526. trace->n = size / ds_cfg.sizeof_rec[qual];
  527. trace->size = ds_cfg.sizeof_rec[qual];
  528. size = (trace->n * trace->size);
  529. trace->begin = (void *)buffer;
  530. trace->top = trace->begin;
  531. trace->end = (void *)(buffer + size);
  532. /*
  533. * The value for 'no threshold' is -1, which will set the
  534. * threshold outside of the buffer, just like we want it.
  535. */
  536. ith *= ds_cfg.sizeof_rec[qual];
  537. trace->ith = (void *)(buffer + size - ith);
  538. trace->flags = flags;
  539. }
  540. static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
  541. enum ds_qualifier qual, struct task_struct *task,
  542. int cpu, void *base, size_t size, size_t th)
  543. {
  544. struct ds_context *context;
  545. int error;
  546. error = -EOPNOTSUPP;
  547. if (!ds_cfg.sizeof_rec[qual])
  548. goto out;
  549. error = -EINVAL;
  550. if (!base)
  551. goto out;
  552. /* We need space for alignment adjustments in ds_init_ds_trace(). */
  553. error = -EINVAL;
  554. if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
  555. goto out;
  556. if (th != (size_t)-1) {
  557. th *= ds_cfg.sizeof_rec[qual];
  558. error = -EINVAL;
  559. if (size <= th)
  560. goto out;
  561. }
  562. tracer->buffer = base;
  563. tracer->size = size;
  564. error = -ENOMEM;
  565. context = ds_get_context(task, cpu);
  566. if (!context)
  567. goto out;
  568. tracer->context = context;
  569. /*
  570. * Defer any tracer-specific initialization work for the context until
  571. * context ownership has been clarified.
  572. */
  573. error = 0;
  574. out:
  575. return error;
  576. }
  577. static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu,
  578. void *base, size_t size,
  579. bts_ovfl_callback_t ovfl, size_t th,
  580. unsigned int flags)
  581. {
  582. struct bts_tracer *tracer;
  583. int error;
  584. /* Buffer overflow notification is not yet implemented. */
  585. error = -EOPNOTSUPP;
  586. if (ovfl)
  587. goto out;
  588. error = get_tracer(task);
  589. if (error < 0)
  590. goto out;
  591. error = -ENOMEM;
  592. tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
  593. if (!tracer)
  594. goto out_put_tracer;
  595. tracer->ovfl = ovfl;
  596. /* Do some more error checking and acquire a tracing context. */
  597. error = ds_request(&tracer->ds, &tracer->trace.ds,
  598. ds_bts, task, cpu, base, size, th);
  599. if (error < 0)
  600. goto out_tracer;
  601. /* Claim the bts part of the tracing context we acquired above. */
  602. spin_lock_irq(&ds_lock);
  603. error = -EPERM;
  604. if (tracer->ds.context->bts_master)
  605. goto out_unlock;
  606. tracer->ds.context->bts_master = tracer;
  607. spin_unlock_irq(&ds_lock);
  608. /*
  609. * Now that we own the bts part of the context, let's complete the
  610. * initialization for that part.
  611. */
  612. ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags);
  613. ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
  614. ds_install_ds_area(tracer->ds.context);
  615. tracer->trace.read = bts_read;
  616. tracer->trace.write = bts_write;
  617. /* Start tracing. */
  618. ds_resume_bts(tracer);
  619. return tracer;
  620. out_unlock:
  621. spin_unlock_irq(&ds_lock);
  622. ds_put_context(tracer->ds.context);
  623. out_tracer:
  624. kfree(tracer);
  625. out_put_tracer:
  626. put_tracer(task);
  627. out:
  628. return ERR_PTR(error);
  629. }
  630. struct bts_tracer *ds_request_bts_task(struct task_struct *task,
  631. void *base, size_t size,
  632. bts_ovfl_callback_t ovfl,
  633. size_t th, unsigned int flags)
  634. {
  635. return ds_request_bts(task, 0, base, size, ovfl, th, flags);
  636. }
  637. struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
  638. bts_ovfl_callback_t ovfl,
  639. size_t th, unsigned int flags)
  640. {
  641. return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags);
  642. }
  643. static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu,
  644. void *base, size_t size,
  645. pebs_ovfl_callback_t ovfl, size_t th,
  646. unsigned int flags)
  647. {
  648. struct pebs_tracer *tracer;
  649. int error;
  650. /* Buffer overflow notification is not yet implemented. */
  651. error = -EOPNOTSUPP;
  652. if (ovfl)
  653. goto out;
  654. error = get_tracer(task);
  655. if (error < 0)
  656. goto out;
  657. error = -ENOMEM;
  658. tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
  659. if (!tracer)
  660. goto out_put_tracer;
  661. tracer->ovfl = ovfl;
  662. /* Do some more error checking and acquire a tracing context. */
  663. error = ds_request(&tracer->ds, &tracer->trace.ds,
  664. ds_pebs, task, cpu, base, size, th);
  665. if (error < 0)
  666. goto out_tracer;
  667. /* Claim the pebs part of the tracing context we acquired above. */
  668. spin_lock_irq(&ds_lock);
  669. error = -EPERM;
  670. if (tracer->ds.context->pebs_master)
  671. goto out_unlock;
  672. tracer->ds.context->pebs_master = tracer;
  673. spin_unlock_irq(&ds_lock);
  674. /*
  675. * Now that we own the pebs part of the context, let's complete the
  676. * initialization for that part.
  677. */
  678. ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags);
  679. ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
  680. ds_install_ds_area(tracer->ds.context);
  681. /* Start tracing. */
  682. ds_resume_pebs(tracer);
  683. return tracer;
  684. out_unlock:
  685. spin_unlock_irq(&ds_lock);
  686. ds_put_context(tracer->ds.context);
  687. out_tracer:
  688. kfree(tracer);
  689. out_put_tracer:
  690. put_tracer(task);
  691. out:
  692. return ERR_PTR(error);
  693. }
  694. struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
  695. void *base, size_t size,
  696. pebs_ovfl_callback_t ovfl,
  697. size_t th, unsigned int flags)
  698. {
  699. return ds_request_pebs(task, 0, base, size, ovfl, th, flags);
  700. }
  701. struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size,
  702. pebs_ovfl_callback_t ovfl,
  703. size_t th, unsigned int flags)
  704. {
  705. return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags);
  706. }
  707. static void ds_free_bts(struct bts_tracer *tracer)
  708. {
  709. struct task_struct *task;
  710. task = tracer->ds.context->task;
  711. WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
  712. tracer->ds.context->bts_master = NULL;
  713. /* Make sure tracing stopped and the tracer is not in use. */
  714. if (task && (task != current))
  715. wait_task_context_switch(task);
  716. ds_put_context(tracer->ds.context);
  717. put_tracer(task);
  718. kfree(tracer);
  719. }
  720. void ds_release_bts(struct bts_tracer *tracer)
  721. {
  722. might_sleep();
  723. if (!tracer)
  724. return;
  725. ds_suspend_bts(tracer);
  726. ds_free_bts(tracer);
  727. }
  728. int ds_release_bts_noirq(struct bts_tracer *tracer)
  729. {
  730. struct task_struct *task;
  731. unsigned long irq;
  732. int error;
  733. if (!tracer)
  734. return 0;
  735. task = tracer->ds.context->task;
  736. local_irq_save(irq);
  737. error = -EPERM;
  738. if (!task &&
  739. (tracer->ds.context->cpu != smp_processor_id()))
  740. goto out;
  741. error = -EPERM;
  742. if (task && (task != current))
  743. goto out;
  744. ds_suspend_bts_noirq(tracer);
  745. ds_free_bts(tracer);
  746. error = 0;
  747. out:
  748. local_irq_restore(irq);
  749. return error;
  750. }
  751. static void update_task_debugctlmsr(struct task_struct *task,
  752. unsigned long debugctlmsr)
  753. {
  754. task->thread.debugctlmsr = debugctlmsr;
  755. get_cpu();
  756. if (task == current)
  757. update_debugctlmsr(debugctlmsr);
  758. if (task->thread.debugctlmsr)
  759. set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
  760. else
  761. clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
  762. put_cpu();
  763. }
  764. void ds_suspend_bts(struct bts_tracer *tracer)
  765. {
  766. struct task_struct *task;
  767. unsigned long debugctlmsr;
  768. int cpu;
  769. if (!tracer)
  770. return;
  771. tracer->flags = 0;
  772. task = tracer->ds.context->task;
  773. cpu = tracer->ds.context->cpu;
  774. WARN_ON(!task && irqs_disabled());
  775. debugctlmsr = (task ?
  776. task->thread.debugctlmsr :
  777. get_debugctlmsr_on_cpu(cpu));
  778. debugctlmsr &= ~BTS_CONTROL;
  779. if (task)
  780. update_task_debugctlmsr(task, debugctlmsr);
  781. else
  782. update_debugctlmsr_on_cpu(cpu, debugctlmsr);
  783. }
  784. int ds_suspend_bts_noirq(struct bts_tracer *tracer)
  785. {
  786. struct task_struct *task;
  787. unsigned long debugctlmsr, irq;
  788. int cpu, error = 0;
  789. if (!tracer)
  790. return 0;
  791. tracer->flags = 0;
  792. task = tracer->ds.context->task;
  793. cpu = tracer->ds.context->cpu;
  794. local_irq_save(irq);
  795. error = -EPERM;
  796. if (!task && (cpu != smp_processor_id()))
  797. goto out;
  798. debugctlmsr = (task ?
  799. task->thread.debugctlmsr :
  800. get_debugctlmsr());
  801. debugctlmsr &= ~BTS_CONTROL;
  802. if (task)
  803. update_task_debugctlmsr(task, debugctlmsr);
  804. else
  805. update_debugctlmsr(debugctlmsr);
  806. error = 0;
  807. out:
  808. local_irq_restore(irq);
  809. return error;
  810. }
  811. static unsigned long ds_bts_control(struct bts_tracer *tracer)
  812. {
  813. unsigned long control;
  814. control = ds_cfg.ctl[dsf_bts];
  815. if (!(tracer->trace.ds.flags & BTS_KERNEL))
  816. control |= ds_cfg.ctl[dsf_bts_kernel];
  817. if (!(tracer->trace.ds.flags & BTS_USER))
  818. control |= ds_cfg.ctl[dsf_bts_user];
  819. return control;
  820. }
  821. void ds_resume_bts(struct bts_tracer *tracer)
  822. {
  823. struct task_struct *task;
  824. unsigned long debugctlmsr;
  825. int cpu;
  826. if (!tracer)
  827. return;
  828. tracer->flags = tracer->trace.ds.flags;
  829. task = tracer->ds.context->task;
  830. cpu = tracer->ds.context->cpu;
  831. WARN_ON(!task && irqs_disabled());
  832. debugctlmsr = (task ?
  833. task->thread.debugctlmsr :
  834. get_debugctlmsr_on_cpu(cpu));
  835. debugctlmsr |= ds_bts_control(tracer);
  836. if (task)
  837. update_task_debugctlmsr(task, debugctlmsr);
  838. else
  839. update_debugctlmsr_on_cpu(cpu, debugctlmsr);
  840. }
  841. int ds_resume_bts_noirq(struct bts_tracer *tracer)
  842. {
  843. struct task_struct *task;
  844. unsigned long debugctlmsr, irq;
  845. int cpu, error = 0;
  846. if (!tracer)
  847. return 0;
  848. tracer->flags = tracer->trace.ds.flags;
  849. task = tracer->ds.context->task;
  850. cpu = tracer->ds.context->cpu;
  851. local_irq_save(irq);
  852. error = -EPERM;
  853. if (!task && (cpu != smp_processor_id()))
  854. goto out;
  855. debugctlmsr = (task ?
  856. task->thread.debugctlmsr :
  857. get_debugctlmsr());
  858. debugctlmsr |= ds_bts_control(tracer);
  859. if (task)
  860. update_task_debugctlmsr(task, debugctlmsr);
  861. else
  862. update_debugctlmsr(debugctlmsr);
  863. error = 0;
  864. out:
  865. local_irq_restore(irq);
  866. return error;
  867. }
  868. static void ds_free_pebs(struct pebs_tracer *tracer)
  869. {
  870. struct task_struct *task;
  871. task = tracer->ds.context->task;
  872. WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
  873. tracer->ds.context->pebs_master = NULL;
  874. ds_put_context(tracer->ds.context);
  875. put_tracer(task);
  876. kfree(tracer);
  877. }
  878. void ds_release_pebs(struct pebs_tracer *tracer)
  879. {
  880. might_sleep();
  881. if (!tracer)
  882. return;
  883. ds_suspend_pebs(tracer);
  884. ds_free_pebs(tracer);
  885. }
  886. int ds_release_pebs_noirq(struct pebs_tracer *tracer)
  887. {
  888. struct task_struct *task;
  889. unsigned long irq;
  890. int error;
  891. if (!tracer)
  892. return 0;
  893. task = tracer->ds.context->task;
  894. local_irq_save(irq);
  895. error = -EPERM;
  896. if (!task &&
  897. (tracer->ds.context->cpu != smp_processor_id()))
  898. goto out;
  899. error = -EPERM;
  900. if (task && (task != current))
  901. goto out;
  902. ds_suspend_pebs_noirq(tracer);
  903. ds_free_pebs(tracer);
  904. error = 0;
  905. out:
  906. local_irq_restore(irq);
  907. return error;
  908. }
  909. void ds_suspend_pebs(struct pebs_tracer *tracer)
  910. {
  911. }
  912. int ds_suspend_pebs_noirq(struct pebs_tracer *tracer)
  913. {
  914. return 0;
  915. }
  916. void ds_resume_pebs(struct pebs_tracer *tracer)
  917. {
  918. }
  919. int ds_resume_pebs_noirq(struct pebs_tracer *tracer)
  920. {
  921. return 0;
  922. }
  923. const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
  924. {
  925. if (!tracer)
  926. return NULL;
  927. ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
  928. return &tracer->trace;
  929. }
  930. const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
  931. {
  932. if (!tracer)
  933. return NULL;
  934. ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
  935. tracer->trace.reset_value =
  936. *(u64 *)(tracer->ds.context->ds +
  937. (ds_cfg.sizeof_ptr_field * 8));
  938. return &tracer->trace;
  939. }
  940. int ds_reset_bts(struct bts_tracer *tracer)
  941. {
  942. if (!tracer)
  943. return -EINVAL;
  944. tracer->trace.ds.top = tracer->trace.ds.begin;
  945. ds_set(tracer->ds.context->ds, ds_bts, ds_index,
  946. (unsigned long)tracer->trace.ds.top);
  947. return 0;
  948. }
  949. int ds_reset_pebs(struct pebs_tracer *tracer)
  950. {
  951. if (!tracer)
  952. return -EINVAL;
  953. tracer->trace.ds.top = tracer->trace.ds.begin;
  954. ds_set(tracer->ds.context->ds, ds_bts, ds_index,
  955. (unsigned long)tracer->trace.ds.top);
  956. return 0;
  957. }
  958. int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
  959. {
  960. if (!tracer)
  961. return -EINVAL;
  962. *(u64 *)(tracer->ds.context->ds +
  963. (ds_cfg.sizeof_ptr_field * 8)) = value;
  964. return 0;
  965. }
  966. static const struct ds_configuration ds_cfg_netburst = {
  967. .name = "Netburst",
  968. .ctl[dsf_bts] = (1 << 2) | (1 << 3),
  969. .ctl[dsf_bts_kernel] = (1 << 5),
  970. .ctl[dsf_bts_user] = (1 << 6),
  971. };
  972. static const struct ds_configuration ds_cfg_pentium_m = {
  973. .name = "Pentium M",
  974. .ctl[dsf_bts] = (1 << 6) | (1 << 7),
  975. };
  976. static const struct ds_configuration ds_cfg_core2_atom = {
  977. .name = "Core 2/Atom",
  978. .ctl[dsf_bts] = (1 << 6) | (1 << 7),
  979. .ctl[dsf_bts_kernel] = (1 << 9),
  980. .ctl[dsf_bts_user] = (1 << 10),
  981. };
  982. static void
  983. ds_configure(const struct ds_configuration *cfg,
  984. struct cpuinfo_x86 *cpu)
  985. {
  986. unsigned long nr_pebs_fields = 0;
  987. printk(KERN_INFO "[ds] using %s configuration\n", cfg->name);
  988. #ifdef __i386__
  989. nr_pebs_fields = 10;
  990. #else
  991. nr_pebs_fields = 18;
  992. #endif
  993. memset(&ds_cfg, 0, sizeof(ds_cfg));
  994. ds_cfg = *cfg;
  995. ds_cfg.sizeof_ptr_field =
  996. (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4);
  997. ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3;
  998. ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields;
  999. if (!cpu_has(cpu, X86_FEATURE_BTS)) {
  1000. ds_cfg.sizeof_rec[ds_bts] = 0;
  1001. printk(KERN_INFO "[ds] bts not available\n");
  1002. }
  1003. if (!cpu_has(cpu, X86_FEATURE_PEBS)) {
  1004. ds_cfg.sizeof_rec[ds_pebs] = 0;
  1005. printk(KERN_INFO "[ds] pebs not available\n");
  1006. }
  1007. printk(KERN_INFO "[ds] sizes: address: %u bit, ",
  1008. 8 * ds_cfg.sizeof_ptr_field);
  1009. printk("bts/pebs record: %u/%u bytes\n",
  1010. ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
  1011. WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_ptr_field));
  1012. }
  1013. void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
  1014. {
  1015. switch (c->x86) {
  1016. case 0x6:
  1017. switch (c->x86_model) {
  1018. case 0x9:
  1019. case 0xd: /* Pentium M */
  1020. ds_configure(&ds_cfg_pentium_m, c);
  1021. break;
  1022. case 0xf:
  1023. case 0x17: /* Core2 */
  1024. case 0x1c: /* Atom */
  1025. ds_configure(&ds_cfg_core2_atom, c);
  1026. break;
  1027. case 0x1a: /* Core i7 */
  1028. default:
  1029. /* Sorry, don't know about them. */
  1030. break;
  1031. }
  1032. break;
  1033. case 0xf:
  1034. switch (c->x86_model) {
  1035. case 0x0:
  1036. case 0x1:
  1037. case 0x2: /* Netburst */
  1038. ds_configure(&ds_cfg_netburst, c);
  1039. break;
  1040. default:
  1041. /* Sorry, don't know about them. */
  1042. break;
  1043. }
  1044. break;
  1045. default:
  1046. /* Sorry, don't know about them. */
  1047. break;
  1048. }
  1049. }
  1050. static inline void ds_take_timestamp(struct ds_context *context,
  1051. enum bts_qualifier qualifier,
  1052. struct task_struct *task)
  1053. {
  1054. struct bts_tracer *tracer = context->bts_master;
  1055. struct bts_struct ts;
  1056. /* Prevent compilers from reading the tracer pointer twice. */
  1057. barrier();
  1058. if (!tracer || !(tracer->flags & BTS_TIMESTAMPS))
  1059. return;
  1060. memset(&ts, 0, sizeof(ts));
  1061. ts.qualifier = qualifier;
  1062. ts.variant.event.clock = trace_clock_global();
  1063. ts.variant.event.pid = task->pid;
  1064. bts_write(tracer, &ts);
  1065. }
  1066. /*
  1067. * Change the DS configuration from tracing prev to tracing next.
  1068. */
  1069. void ds_switch_to(struct task_struct *prev, struct task_struct *next)
  1070. {
  1071. struct ds_context *prev_ctx = prev->thread.ds_ctx;
  1072. struct ds_context *next_ctx = next->thread.ds_ctx;
  1073. unsigned long debugctlmsr = next->thread.debugctlmsr;
  1074. /* Make sure all data is read before we start. */
  1075. barrier();
  1076. if (prev_ctx) {
  1077. update_debugctlmsr(0);
  1078. ds_take_timestamp(prev_ctx, bts_task_departs, prev);
  1079. }
  1080. if (next_ctx) {
  1081. ds_take_timestamp(next_ctx, bts_task_arrives, next);
  1082. wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
  1083. }
  1084. update_debugctlmsr(debugctlmsr);
  1085. }
  1086. void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
  1087. {
  1088. clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR);
  1089. tsk->thread.ds_ctx = NULL;
  1090. }
  1091. void ds_exit_thread(struct task_struct *tsk)
  1092. {
  1093. }
  1094. static __init int ds_selftest(void)
  1095. {
  1096. if (ds_cfg.sizeof_rec[ds_bts]) {
  1097. int error;
  1098. error = ds_selftest_bts();
  1099. if (error) {
  1100. WARN(1, "[ds] selftest failed. disabling bts.\n");
  1101. ds_cfg.sizeof_rec[ds_bts] = 0;
  1102. }
  1103. }
  1104. if (ds_cfg.sizeof_rec[ds_pebs]) {
  1105. int error;
  1106. error = ds_selftest_pebs();
  1107. if (error) {
  1108. WARN(1, "[ds] selftest failed. disabling pebs.\n");
  1109. ds_cfg.sizeof_rec[ds_pebs] = 0;
  1110. }
  1111. }
  1112. return 0;
  1113. }
  1114. device_initcall(ds_selftest);