ds.c 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382
  1. /*
  2. * Debug Store support
  3. *
  4. * This provides a low-level interface to the hardware's Debug Store
  5. * feature that is used for branch trace store (BTS) and
  6. * precise-event based sampling (PEBS).
  7. *
  8. * It manages:
  9. * - DS and BTS hardware configuration
  10. * - buffer overflow handling (to be done)
  11. * - buffer access
  12. *
  13. * It does not do:
  14. * - security checking (is the caller allowed to trace the task)
  15. * - buffer allocation (memory accounting)
  16. *
  17. *
  18. * Copyright (C) 2007-2009 Intel Corporation.
  19. * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
  20. */
  21. #include <linux/kernel.h>
  22. #include <linux/string.h>
  23. #include <linux/errno.h>
  24. #include <linux/sched.h>
  25. #include <linux/slab.h>
  26. #include <linux/mm.h>
  27. #include <linux/trace_clock.h>
  28. #include <asm/ds.h>
  29. #include "ds_selftest.h"
  30. /*
  31. * The configuration for a particular DS hardware implementation:
  32. */
  33. struct ds_configuration {
  34. /* The name of the configuration: */
  35. const char *name;
  36. /* The size of pointer-typed fields in DS, BTS, and PEBS: */
  37. unsigned char sizeof_ptr_field;
  38. /* The size of a BTS/PEBS record in bytes: */
  39. unsigned char sizeof_rec[2];
  40. /* Control bit-masks indexed by enum ds_feature: */
  41. unsigned long ctl[dsf_ctl_max];
  42. };
  43. static struct ds_configuration ds_cfg __read_mostly;
  44. /* Maximal size of a DS configuration: */
  45. #define MAX_SIZEOF_DS (12 * 8)
  46. /* Maximal size of a BTS record: */
  47. #define MAX_SIZEOF_BTS (3 * 8)
  48. /* BTS and PEBS buffer alignment: */
  49. #define DS_ALIGNMENT (1 << 3)
  50. /* Mask of control bits in the DS MSR register: */
  51. #define BTS_CONTROL \
  52. ( ds_cfg.ctl[dsf_bts] | \
  53. ds_cfg.ctl[dsf_bts_kernel] | \
  54. ds_cfg.ctl[dsf_bts_user] | \
  55. ds_cfg.ctl[dsf_bts_overflow] )
  56. /*
  57. * A BTS or PEBS tracer.
  58. *
  59. * This holds the configuration of the tracer and serves as a handle
  60. * to identify tracers.
  61. */
  62. struct ds_tracer {
  63. /* The DS context (partially) owned by this tracer. */
  64. struct ds_context *context;
  65. /* The buffer provided on ds_request() and its size in bytes. */
  66. void *buffer;
  67. size_t size;
  68. };
  69. struct bts_tracer {
  70. /* The common DS part: */
  71. struct ds_tracer ds;
  72. /* The trace including the DS configuration: */
  73. struct bts_trace trace;
  74. /* Buffer overflow notification function: */
  75. bts_ovfl_callback_t ovfl;
  76. /* Active flags affecting trace collection. */
  77. unsigned int flags;
  78. };
  79. struct pebs_tracer {
  80. /* The common DS part: */
  81. struct ds_tracer ds;
  82. /* The trace including the DS configuration: */
  83. struct pebs_trace trace;
  84. /* Buffer overflow notification function: */
  85. pebs_ovfl_callback_t ovfl;
  86. };
  87. /*
  88. * Debug Store (DS) save area configuration (see Intel64 and IA32
  89. * Architectures Software Developer's Manual, section 18.5)
  90. *
  91. * The DS configuration consists of the following fields; different
  92. * architetures vary in the size of those fields.
  93. *
  94. * - double-word aligned base linear address of the BTS buffer
  95. * - write pointer into the BTS buffer
  96. * - end linear address of the BTS buffer (one byte beyond the end of
  97. * the buffer)
  98. * - interrupt pointer into BTS buffer
  99. * (interrupt occurs when write pointer passes interrupt pointer)
  100. * - double-word aligned base linear address of the PEBS buffer
  101. * - write pointer into the PEBS buffer
  102. * - end linear address of the PEBS buffer (one byte beyond the end of
  103. * the buffer)
  104. * - interrupt pointer into PEBS buffer
  105. * (interrupt occurs when write pointer passes interrupt pointer)
  106. * - value to which counter is reset following counter overflow
  107. *
  108. * Later architectures use 64bit pointers throughout, whereas earlier
  109. * architectures use 32bit pointers in 32bit mode.
  110. *
  111. *
  112. * We compute the base address for the first 8 fields based on:
  113. * - the field size stored in the DS configuration
  114. * - the relative field position
  115. * - an offset giving the start of the respective region
  116. *
  117. * This offset is further used to index various arrays holding
  118. * information for BTS and PEBS at the respective index.
  119. *
  120. * On later 32bit processors, we only access the lower 32bit of the
  121. * 64bit pointer fields. The upper halves will be zeroed out.
  122. */
  123. enum ds_field {
  124. ds_buffer_base = 0,
  125. ds_index,
  126. ds_absolute_maximum,
  127. ds_interrupt_threshold,
  128. };
  129. enum ds_qualifier {
  130. ds_bts = 0,
  131. ds_pebs
  132. };
  133. static inline unsigned long
  134. ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field)
  135. {
  136. base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
  137. return *(unsigned long *)base;
  138. }
  139. static inline void
  140. ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field,
  141. unsigned long value)
  142. {
  143. base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
  144. (*(unsigned long *)base) = value;
  145. }
  146. /*
  147. * Locking is done only for allocating BTS or PEBS resources.
  148. */
  149. static DEFINE_SPINLOCK(ds_lock);
  150. /*
  151. * We either support (system-wide) per-cpu or per-thread allocation.
  152. * We distinguish the two based on the task_struct pointer, where a
  153. * NULL pointer indicates per-cpu allocation for the current cpu.
  154. *
  155. * Allocations are use-counted. As soon as resources are allocated,
  156. * further allocations must be of the same type (per-cpu or
  157. * per-thread). We model this by counting allocations (i.e. the number
  158. * of tracers of a certain type) for one type negatively:
  159. * =0 no tracers
  160. * >0 number of per-thread tracers
  161. * <0 number of per-cpu tracers
  162. *
  163. * Tracers essentially gives the number of ds contexts for a certain
  164. * type of allocation.
  165. */
  166. static atomic_t tracers = ATOMIC_INIT(0);
  167. static inline int get_tracer(struct task_struct *task)
  168. {
  169. int error;
  170. spin_lock_irq(&ds_lock);
  171. if (task) {
  172. error = -EPERM;
  173. if (atomic_read(&tracers) < 0)
  174. goto out;
  175. atomic_inc(&tracers);
  176. } else {
  177. error = -EPERM;
  178. if (atomic_read(&tracers) > 0)
  179. goto out;
  180. atomic_dec(&tracers);
  181. }
  182. error = 0;
  183. out:
  184. spin_unlock_irq(&ds_lock);
  185. return error;
  186. }
  187. static inline void put_tracer(struct task_struct *task)
  188. {
  189. if (task)
  190. atomic_dec(&tracers);
  191. else
  192. atomic_inc(&tracers);
  193. }
  194. /*
  195. * The DS context is either attached to a thread or to a cpu:
  196. * - in the former case, the thread_struct contains a pointer to the
  197. * attached context.
  198. * - in the latter case, we use a static array of per-cpu context
  199. * pointers.
  200. *
  201. * Contexts are use-counted. They are allocated on first access and
  202. * deallocated when the last user puts the context.
  203. */
  204. struct ds_context {
  205. /* The DS configuration; goes into MSR_IA32_DS_AREA: */
  206. unsigned char ds[MAX_SIZEOF_DS];
  207. /* The owner of the BTS and PEBS configuration, respectively: */
  208. struct bts_tracer *bts_master;
  209. struct pebs_tracer *pebs_master;
  210. /* Use count: */
  211. unsigned long count;
  212. /* Pointer to the context pointer field: */
  213. struct ds_context **this;
  214. /* The traced task; NULL for cpu tracing: */
  215. struct task_struct *task;
  216. /* The traced cpu; only valid if task is NULL: */
  217. int cpu;
  218. };
  219. static DEFINE_PER_CPU(struct ds_context *, cpu_context);
  220. static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
  221. {
  222. struct ds_context **p_context =
  223. (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu));
  224. struct ds_context *context = NULL;
  225. struct ds_context *new_context = NULL;
  226. /* Chances are small that we already have a context. */
  227. new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
  228. if (!new_context)
  229. return NULL;
  230. spin_lock_irq(&ds_lock);
  231. context = *p_context;
  232. if (likely(!context)) {
  233. context = new_context;
  234. context->this = p_context;
  235. context->task = task;
  236. context->cpu = cpu;
  237. context->count = 0;
  238. *p_context = context;
  239. }
  240. context->count++;
  241. spin_unlock_irq(&ds_lock);
  242. if (context != new_context)
  243. kfree(new_context);
  244. return context;
  245. }
  246. static void ds_put_context(struct ds_context *context)
  247. {
  248. struct task_struct *task;
  249. unsigned long irq;
  250. if (!context)
  251. return;
  252. spin_lock_irqsave(&ds_lock, irq);
  253. if (--context->count) {
  254. spin_unlock_irqrestore(&ds_lock, irq);
  255. return;
  256. }
  257. *(context->this) = NULL;
  258. task = context->task;
  259. if (task)
  260. clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
  261. /*
  262. * We leave the (now dangling) pointer to the DS configuration in
  263. * the DS_AREA msr. This is as good or as bad as replacing it with
  264. * NULL - the hardware would crash if we enabled tracing.
  265. *
  266. * This saves us some problems with having to write an msr on a
  267. * different cpu while preventing others from doing the same for the
  268. * next context for that same cpu.
  269. */
  270. spin_unlock_irqrestore(&ds_lock, irq);
  271. /* The context might still be in use for context switching. */
  272. if (task && (task != current))
  273. wait_task_context_switch(task);
  274. kfree(context);
  275. }
  276. static void ds_install_ds_area(struct ds_context *context)
  277. {
  278. unsigned long ds;
  279. ds = (unsigned long)context->ds;
  280. /*
  281. * There is a race between the bts master and the pebs master.
  282. *
  283. * The thread/cpu access is synchronized via get/put_cpu() for
  284. * task tracing and via wrmsr_on_cpu for cpu tracing.
  285. *
  286. * If bts and pebs are collected for the same task or same cpu,
  287. * the same confiuration is written twice.
  288. */
  289. if (context->task) {
  290. get_cpu();
  291. if (context->task == current)
  292. wrmsrl(MSR_IA32_DS_AREA, ds);
  293. set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
  294. put_cpu();
  295. } else
  296. wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA,
  297. (u32)((u64)ds), (u32)((u64)ds >> 32));
  298. }
  299. /*
  300. * Call the tracer's callback on a buffer overflow.
  301. *
  302. * context: the ds context
  303. * qual: the buffer type
  304. */
  305. static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
  306. {
  307. switch (qual) {
  308. case ds_bts:
  309. if (context->bts_master &&
  310. context->bts_master->ovfl)
  311. context->bts_master->ovfl(context->bts_master);
  312. break;
  313. case ds_pebs:
  314. if (context->pebs_master &&
  315. context->pebs_master->ovfl)
  316. context->pebs_master->ovfl(context->pebs_master);
  317. break;
  318. }
  319. }
  320. /*
  321. * Write raw data into the BTS or PEBS buffer.
  322. *
  323. * The remainder of any partially written record is zeroed out.
  324. *
  325. * context: the DS context
  326. * qual: the buffer type
  327. * record: the data to write
  328. * size: the size of the data
  329. */
  330. static int ds_write(struct ds_context *context, enum ds_qualifier qual,
  331. const void *record, size_t size)
  332. {
  333. int bytes_written = 0;
  334. if (!record)
  335. return -EINVAL;
  336. while (size) {
  337. unsigned long base, index, end, write_end, int_th;
  338. unsigned long write_size, adj_write_size;
  339. /*
  340. * Write as much as possible without producing an
  341. * overflow interrupt.
  342. *
  343. * Interrupt_threshold must either be
  344. * - bigger than absolute_maximum or
  345. * - point to a record between buffer_base and absolute_maximum
  346. *
  347. * Index points to a valid record.
  348. */
  349. base = ds_get(context->ds, qual, ds_buffer_base);
  350. index = ds_get(context->ds, qual, ds_index);
  351. end = ds_get(context->ds, qual, ds_absolute_maximum);
  352. int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
  353. write_end = min(end, int_th);
  354. /*
  355. * If we are already beyond the interrupt threshold,
  356. * we fill the entire buffer.
  357. */
  358. if (write_end <= index)
  359. write_end = end;
  360. if (write_end <= index)
  361. break;
  362. write_size = min((unsigned long) size, write_end - index);
  363. memcpy((void *)index, record, write_size);
  364. record = (const char *)record + write_size;
  365. size -= write_size;
  366. bytes_written += write_size;
  367. adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
  368. adj_write_size *= ds_cfg.sizeof_rec[qual];
  369. /* Zero out trailing bytes. */
  370. memset((char *)index + write_size, 0,
  371. adj_write_size - write_size);
  372. index += adj_write_size;
  373. if (index >= end)
  374. index = base;
  375. ds_set(context->ds, qual, ds_index, index);
  376. if (index >= int_th)
  377. ds_overflow(context, qual);
  378. }
  379. return bytes_written;
  380. }
  381. /*
  382. * Branch Trace Store (BTS) uses the following format. Different
  383. * architectures vary in the size of those fields.
  384. * - source linear address
  385. * - destination linear address
  386. * - flags
  387. *
  388. * Later architectures use 64bit pointers throughout, whereas earlier
  389. * architectures use 32bit pointers in 32bit mode.
  390. *
  391. * We compute the base address for the fields based on:
  392. * - the field size stored in the DS configuration
  393. * - the relative field position
  394. *
  395. * In order to store additional information in the BTS buffer, we use
  396. * a special source address to indicate that the record requires
  397. * special interpretation.
  398. *
  399. * Netburst indicated via a bit in the flags field whether the branch
  400. * was predicted; this is ignored.
  401. *
  402. * We use two levels of abstraction:
  403. * - the raw data level defined here
  404. * - an arch-independent level defined in ds.h
  405. */
  406. enum bts_field {
  407. bts_from,
  408. bts_to,
  409. bts_flags,
  410. bts_qual = bts_from,
  411. bts_clock = bts_to,
  412. bts_pid = bts_flags,
  413. bts_qual_mask = (bts_qual_max - 1),
  414. bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
  415. };
  416. static inline unsigned long bts_get(const char *base, enum bts_field field)
  417. {
  418. base += (ds_cfg.sizeof_ptr_field * field);
  419. return *(unsigned long *)base;
  420. }
  421. static inline void bts_set(char *base, enum bts_field field, unsigned long val)
  422. {
  423. base += (ds_cfg.sizeof_ptr_field * field);;
  424. (*(unsigned long *)base) = val;
  425. }
  426. /*
  427. * The raw BTS data is architecture dependent.
  428. *
  429. * For higher-level users, we give an arch-independent view.
  430. * - ds.h defines struct bts_struct
  431. * - bts_read translates one raw bts record into a bts_struct
  432. * - bts_write translates one bts_struct into the raw format and
  433. * writes it into the top of the parameter tracer's buffer.
  434. *
  435. * return: bytes read/written on success; -Eerrno, otherwise
  436. */
  437. static int
  438. bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
  439. {
  440. if (!tracer)
  441. return -EINVAL;
  442. if (at < tracer->trace.ds.begin)
  443. return -EINVAL;
  444. if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
  445. return -EINVAL;
  446. memset(out, 0, sizeof(*out));
  447. if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
  448. out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
  449. out->variant.event.clock = bts_get(at, bts_clock);
  450. out->variant.event.pid = bts_get(at, bts_pid);
  451. } else {
  452. out->qualifier = bts_branch;
  453. out->variant.lbr.from = bts_get(at, bts_from);
  454. out->variant.lbr.to = bts_get(at, bts_to);
  455. if (!out->variant.lbr.from && !out->variant.lbr.to)
  456. out->qualifier = bts_invalid;
  457. }
  458. return ds_cfg.sizeof_rec[ds_bts];
  459. }
  460. static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
  461. {
  462. unsigned char raw[MAX_SIZEOF_BTS];
  463. if (!tracer)
  464. return -EINVAL;
  465. if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
  466. return -EOVERFLOW;
  467. switch (in->qualifier) {
  468. case bts_invalid:
  469. bts_set(raw, bts_from, 0);
  470. bts_set(raw, bts_to, 0);
  471. bts_set(raw, bts_flags, 0);
  472. break;
  473. case bts_branch:
  474. bts_set(raw, bts_from, in->variant.lbr.from);
  475. bts_set(raw, bts_to, in->variant.lbr.to);
  476. bts_set(raw, bts_flags, 0);
  477. break;
  478. case bts_task_arrives:
  479. case bts_task_departs:
  480. bts_set(raw, bts_qual, (bts_escape | in->qualifier));
  481. bts_set(raw, bts_clock, in->variant.event.clock);
  482. bts_set(raw, bts_pid, in->variant.event.pid);
  483. break;
  484. default:
  485. return -EINVAL;
  486. }
  487. return ds_write(tracer->ds.context, ds_bts, raw,
  488. ds_cfg.sizeof_rec[ds_bts]);
  489. }
  490. static void ds_write_config(struct ds_context *context,
  491. struct ds_trace *cfg, enum ds_qualifier qual)
  492. {
  493. unsigned char *ds = context->ds;
  494. ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
  495. ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
  496. ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
  497. ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
  498. }
  499. static void ds_read_config(struct ds_context *context,
  500. struct ds_trace *cfg, enum ds_qualifier qual)
  501. {
  502. unsigned char *ds = context->ds;
  503. cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
  504. cfg->top = (void *)ds_get(ds, qual, ds_index);
  505. cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
  506. cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
  507. }
  508. static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
  509. void *base, size_t size, size_t ith,
  510. unsigned int flags) {
  511. unsigned long buffer, adj;
  512. /*
  513. * Adjust the buffer address and size to meet alignment
  514. * constraints:
  515. * - buffer is double-word aligned
  516. * - size is multiple of record size
  517. *
  518. * We checked the size at the very beginning; we have enough
  519. * space to do the adjustment.
  520. */
  521. buffer = (unsigned long)base;
  522. adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
  523. buffer += adj;
  524. size -= adj;
  525. trace->n = size / ds_cfg.sizeof_rec[qual];
  526. trace->size = ds_cfg.sizeof_rec[qual];
  527. size = (trace->n * trace->size);
  528. trace->begin = (void *)buffer;
  529. trace->top = trace->begin;
  530. trace->end = (void *)(buffer + size);
  531. /*
  532. * The value for 'no threshold' is -1, which will set the
  533. * threshold outside of the buffer, just like we want it.
  534. */
  535. ith *= ds_cfg.sizeof_rec[qual];
  536. trace->ith = (void *)(buffer + size - ith);
  537. trace->flags = flags;
  538. }
  539. static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
  540. enum ds_qualifier qual, struct task_struct *task,
  541. int cpu, void *base, size_t size, size_t th)
  542. {
  543. struct ds_context *context;
  544. int error;
  545. size_t req_size;
  546. error = -EOPNOTSUPP;
  547. if (!ds_cfg.sizeof_rec[qual])
  548. goto out;
  549. error = -EINVAL;
  550. if (!base)
  551. goto out;
  552. req_size = ds_cfg.sizeof_rec[qual];
  553. /* We might need space for alignment adjustments. */
  554. if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT))
  555. req_size += DS_ALIGNMENT;
  556. error = -EINVAL;
  557. if (size < req_size)
  558. goto out;
  559. if (th != (size_t)-1) {
  560. th *= ds_cfg.sizeof_rec[qual];
  561. error = -EINVAL;
  562. if (size <= th)
  563. goto out;
  564. }
  565. tracer->buffer = base;
  566. tracer->size = size;
  567. error = -ENOMEM;
  568. context = ds_get_context(task, cpu);
  569. if (!context)
  570. goto out;
  571. tracer->context = context;
  572. /*
  573. * Defer any tracer-specific initialization work for the context until
  574. * context ownership has been clarified.
  575. */
  576. error = 0;
  577. out:
  578. return error;
  579. }
  580. static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu,
  581. void *base, size_t size,
  582. bts_ovfl_callback_t ovfl, size_t th,
  583. unsigned int flags)
  584. {
  585. struct bts_tracer *tracer;
  586. int error;
  587. /* Buffer overflow notification is not yet implemented. */
  588. error = -EOPNOTSUPP;
  589. if (ovfl)
  590. goto out;
  591. error = get_tracer(task);
  592. if (error < 0)
  593. goto out;
  594. error = -ENOMEM;
  595. tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
  596. if (!tracer)
  597. goto out_put_tracer;
  598. tracer->ovfl = ovfl;
  599. /* Do some more error checking and acquire a tracing context. */
  600. error = ds_request(&tracer->ds, &tracer->trace.ds,
  601. ds_bts, task, cpu, base, size, th);
  602. if (error < 0)
  603. goto out_tracer;
  604. /* Claim the bts part of the tracing context we acquired above. */
  605. spin_lock_irq(&ds_lock);
  606. error = -EPERM;
  607. if (tracer->ds.context->bts_master)
  608. goto out_unlock;
  609. tracer->ds.context->bts_master = tracer;
  610. spin_unlock_irq(&ds_lock);
  611. /*
  612. * Now that we own the bts part of the context, let's complete the
  613. * initialization for that part.
  614. */
  615. ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags);
  616. ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
  617. ds_install_ds_area(tracer->ds.context);
  618. tracer->trace.read = bts_read;
  619. tracer->trace.write = bts_write;
  620. /* Start tracing. */
  621. ds_resume_bts(tracer);
  622. return tracer;
  623. out_unlock:
  624. spin_unlock_irq(&ds_lock);
  625. ds_put_context(tracer->ds.context);
  626. out_tracer:
  627. kfree(tracer);
  628. out_put_tracer:
  629. put_tracer(task);
  630. out:
  631. return ERR_PTR(error);
  632. }
  633. struct bts_tracer *ds_request_bts_task(struct task_struct *task,
  634. void *base, size_t size,
  635. bts_ovfl_callback_t ovfl,
  636. size_t th, unsigned int flags)
  637. {
  638. return ds_request_bts(task, 0, base, size, ovfl, th, flags);
  639. }
  640. struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
  641. bts_ovfl_callback_t ovfl,
  642. size_t th, unsigned int flags)
  643. {
  644. return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags);
  645. }
  646. static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu,
  647. void *base, size_t size,
  648. pebs_ovfl_callback_t ovfl, size_t th,
  649. unsigned int flags)
  650. {
  651. struct pebs_tracer *tracer;
  652. int error;
  653. /* Buffer overflow notification is not yet implemented. */
  654. error = -EOPNOTSUPP;
  655. if (ovfl)
  656. goto out;
  657. error = get_tracer(task);
  658. if (error < 0)
  659. goto out;
  660. error = -ENOMEM;
  661. tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
  662. if (!tracer)
  663. goto out_put_tracer;
  664. tracer->ovfl = ovfl;
  665. /* Do some more error checking and acquire a tracing context. */
  666. error = ds_request(&tracer->ds, &tracer->trace.ds,
  667. ds_pebs, task, cpu, base, size, th);
  668. if (error < 0)
  669. goto out_tracer;
  670. /* Claim the pebs part of the tracing context we acquired above. */
  671. spin_lock_irq(&ds_lock);
  672. error = -EPERM;
  673. if (tracer->ds.context->pebs_master)
  674. goto out_unlock;
  675. tracer->ds.context->pebs_master = tracer;
  676. spin_unlock_irq(&ds_lock);
  677. /*
  678. * Now that we own the pebs part of the context, let's complete the
  679. * initialization for that part.
  680. */
  681. ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags);
  682. ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
  683. ds_install_ds_area(tracer->ds.context);
  684. /* Start tracing. */
  685. ds_resume_pebs(tracer);
  686. return tracer;
  687. out_unlock:
  688. spin_unlock_irq(&ds_lock);
  689. ds_put_context(tracer->ds.context);
  690. out_tracer:
  691. kfree(tracer);
  692. out_put_tracer:
  693. put_tracer(task);
  694. out:
  695. return ERR_PTR(error);
  696. }
  697. struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
  698. void *base, size_t size,
  699. pebs_ovfl_callback_t ovfl,
  700. size_t th, unsigned int flags)
  701. {
  702. return ds_request_pebs(task, 0, base, size, ovfl, th, flags);
  703. }
  704. struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size,
  705. pebs_ovfl_callback_t ovfl,
  706. size_t th, unsigned int flags)
  707. {
  708. return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags);
  709. }
  710. static void ds_free_bts(struct bts_tracer *tracer)
  711. {
  712. struct task_struct *task;
  713. task = tracer->ds.context->task;
  714. WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
  715. tracer->ds.context->bts_master = NULL;
  716. /* Make sure tracing stopped and the tracer is not in use. */
  717. if (task && (task != current))
  718. wait_task_context_switch(task);
  719. ds_put_context(tracer->ds.context);
  720. put_tracer(task);
  721. kfree(tracer);
  722. }
  723. void ds_release_bts(struct bts_tracer *tracer)
  724. {
  725. might_sleep();
  726. if (!tracer)
  727. return;
  728. ds_suspend_bts(tracer);
  729. ds_free_bts(tracer);
  730. }
  731. int ds_release_bts_noirq(struct bts_tracer *tracer)
  732. {
  733. struct task_struct *task;
  734. unsigned long irq;
  735. int error;
  736. if (!tracer)
  737. return 0;
  738. task = tracer->ds.context->task;
  739. local_irq_save(irq);
  740. error = -EPERM;
  741. if (!task &&
  742. (tracer->ds.context->cpu != smp_processor_id()))
  743. goto out;
  744. error = -EPERM;
  745. if (task && (task != current))
  746. goto out;
  747. ds_suspend_bts_noirq(tracer);
  748. ds_free_bts(tracer);
  749. error = 0;
  750. out:
  751. local_irq_restore(irq);
  752. return error;
  753. }
  754. static void update_task_debugctlmsr(struct task_struct *task,
  755. unsigned long debugctlmsr)
  756. {
  757. task->thread.debugctlmsr = debugctlmsr;
  758. get_cpu();
  759. if (task == current)
  760. update_debugctlmsr(debugctlmsr);
  761. put_cpu();
  762. }
  763. void ds_suspend_bts(struct bts_tracer *tracer)
  764. {
  765. struct task_struct *task;
  766. unsigned long debugctlmsr;
  767. int cpu;
  768. if (!tracer)
  769. return;
  770. tracer->flags = 0;
  771. task = tracer->ds.context->task;
  772. cpu = tracer->ds.context->cpu;
  773. WARN_ON(!task && irqs_disabled());
  774. debugctlmsr = (task ?
  775. task->thread.debugctlmsr :
  776. get_debugctlmsr_on_cpu(cpu));
  777. debugctlmsr &= ~BTS_CONTROL;
  778. if (task)
  779. update_task_debugctlmsr(task, debugctlmsr);
  780. else
  781. update_debugctlmsr_on_cpu(cpu, debugctlmsr);
  782. }
  783. int ds_suspend_bts_noirq(struct bts_tracer *tracer)
  784. {
  785. struct task_struct *task;
  786. unsigned long debugctlmsr, irq;
  787. int cpu, error = 0;
  788. if (!tracer)
  789. return 0;
  790. tracer->flags = 0;
  791. task = tracer->ds.context->task;
  792. cpu = tracer->ds.context->cpu;
  793. local_irq_save(irq);
  794. error = -EPERM;
  795. if (!task && (cpu != smp_processor_id()))
  796. goto out;
  797. debugctlmsr = (task ?
  798. task->thread.debugctlmsr :
  799. get_debugctlmsr());
  800. debugctlmsr &= ~BTS_CONTROL;
  801. if (task)
  802. update_task_debugctlmsr(task, debugctlmsr);
  803. else
  804. update_debugctlmsr(debugctlmsr);
  805. error = 0;
  806. out:
  807. local_irq_restore(irq);
  808. return error;
  809. }
  810. static unsigned long ds_bts_control(struct bts_tracer *tracer)
  811. {
  812. unsigned long control;
  813. control = ds_cfg.ctl[dsf_bts];
  814. if (!(tracer->trace.ds.flags & BTS_KERNEL))
  815. control |= ds_cfg.ctl[dsf_bts_kernel];
  816. if (!(tracer->trace.ds.flags & BTS_USER))
  817. control |= ds_cfg.ctl[dsf_bts_user];
  818. return control;
  819. }
  820. void ds_resume_bts(struct bts_tracer *tracer)
  821. {
  822. struct task_struct *task;
  823. unsigned long debugctlmsr;
  824. int cpu;
  825. if (!tracer)
  826. return;
  827. tracer->flags = tracer->trace.ds.flags;
  828. task = tracer->ds.context->task;
  829. cpu = tracer->ds.context->cpu;
  830. WARN_ON(!task && irqs_disabled());
  831. debugctlmsr = (task ?
  832. task->thread.debugctlmsr :
  833. get_debugctlmsr_on_cpu(cpu));
  834. debugctlmsr |= ds_bts_control(tracer);
  835. if (task)
  836. update_task_debugctlmsr(task, debugctlmsr);
  837. else
  838. update_debugctlmsr_on_cpu(cpu, debugctlmsr);
  839. }
  840. int ds_resume_bts_noirq(struct bts_tracer *tracer)
  841. {
  842. struct task_struct *task;
  843. unsigned long debugctlmsr, irq;
  844. int cpu, error = 0;
  845. if (!tracer)
  846. return 0;
  847. tracer->flags = tracer->trace.ds.flags;
  848. task = tracer->ds.context->task;
  849. cpu = tracer->ds.context->cpu;
  850. local_irq_save(irq);
  851. error = -EPERM;
  852. if (!task && (cpu != smp_processor_id()))
  853. goto out;
  854. debugctlmsr = (task ?
  855. task->thread.debugctlmsr :
  856. get_debugctlmsr());
  857. debugctlmsr |= ds_bts_control(tracer);
  858. if (task)
  859. update_task_debugctlmsr(task, debugctlmsr);
  860. else
  861. update_debugctlmsr(debugctlmsr);
  862. error = 0;
  863. out:
  864. local_irq_restore(irq);
  865. return error;
  866. }
  867. static void ds_free_pebs(struct pebs_tracer *tracer)
  868. {
  869. struct task_struct *task;
  870. task = tracer->ds.context->task;
  871. WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
  872. tracer->ds.context->pebs_master = NULL;
  873. ds_put_context(tracer->ds.context);
  874. put_tracer(task);
  875. kfree(tracer);
  876. }
  877. void ds_release_pebs(struct pebs_tracer *tracer)
  878. {
  879. might_sleep();
  880. if (!tracer)
  881. return;
  882. ds_suspend_pebs(tracer);
  883. ds_free_pebs(tracer);
  884. }
  885. int ds_release_pebs_noirq(struct pebs_tracer *tracer)
  886. {
  887. struct task_struct *task;
  888. unsigned long irq;
  889. int error;
  890. if (!tracer)
  891. return 0;
  892. task = tracer->ds.context->task;
  893. local_irq_save(irq);
  894. error = -EPERM;
  895. if (!task &&
  896. (tracer->ds.context->cpu != smp_processor_id()))
  897. goto out;
  898. error = -EPERM;
  899. if (task && (task != current))
  900. goto out;
  901. ds_suspend_pebs_noirq(tracer);
  902. ds_free_pebs(tracer);
  903. error = 0;
  904. out:
  905. local_irq_restore(irq);
  906. return error;
  907. }
  908. void ds_suspend_pebs(struct pebs_tracer *tracer)
  909. {
  910. }
  911. int ds_suspend_pebs_noirq(struct pebs_tracer *tracer)
  912. {
  913. return 0;
  914. }
  915. void ds_resume_pebs(struct pebs_tracer *tracer)
  916. {
  917. }
  918. int ds_resume_pebs_noirq(struct pebs_tracer *tracer)
  919. {
  920. return 0;
  921. }
  922. const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
  923. {
  924. if (!tracer)
  925. return NULL;
  926. ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
  927. return &tracer->trace;
  928. }
  929. const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
  930. {
  931. if (!tracer)
  932. return NULL;
  933. ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
  934. tracer->trace.reset_value =
  935. *(u64 *)(tracer->ds.context->ds +
  936. (ds_cfg.sizeof_ptr_field * 8));
  937. return &tracer->trace;
  938. }
  939. int ds_reset_bts(struct bts_tracer *tracer)
  940. {
  941. if (!tracer)
  942. return -EINVAL;
  943. tracer->trace.ds.top = tracer->trace.ds.begin;
  944. ds_set(tracer->ds.context->ds, ds_bts, ds_index,
  945. (unsigned long)tracer->trace.ds.top);
  946. return 0;
  947. }
  948. int ds_reset_pebs(struct pebs_tracer *tracer)
  949. {
  950. if (!tracer)
  951. return -EINVAL;
  952. tracer->trace.ds.top = tracer->trace.ds.begin;
  953. ds_set(tracer->ds.context->ds, ds_pebs, ds_index,
  954. (unsigned long)tracer->trace.ds.top);
  955. return 0;
  956. }
  957. int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
  958. {
  959. if (!tracer)
  960. return -EINVAL;
  961. *(u64 *)(tracer->ds.context->ds +
  962. (ds_cfg.sizeof_ptr_field * 8)) = value;
  963. return 0;
  964. }
  965. static const struct ds_configuration ds_cfg_netburst = {
  966. .name = "Netburst",
  967. .ctl[dsf_bts] = (1 << 2) | (1 << 3),
  968. .ctl[dsf_bts_kernel] = (1 << 5),
  969. .ctl[dsf_bts_user] = (1 << 6),
  970. };
  971. static const struct ds_configuration ds_cfg_pentium_m = {
  972. .name = "Pentium M",
  973. .ctl[dsf_bts] = (1 << 6) | (1 << 7),
  974. };
  975. static const struct ds_configuration ds_cfg_core2_atom = {
  976. .name = "Core 2/Atom",
  977. .ctl[dsf_bts] = (1 << 6) | (1 << 7),
  978. .ctl[dsf_bts_kernel] = (1 << 9),
  979. .ctl[dsf_bts_user] = (1 << 10),
  980. };
  981. static void
  982. ds_configure(const struct ds_configuration *cfg,
  983. struct cpuinfo_x86 *cpu)
  984. {
  985. unsigned long nr_pebs_fields = 0;
  986. printk(KERN_INFO "[ds] using %s configuration\n", cfg->name);
  987. #ifdef __i386__
  988. nr_pebs_fields = 10;
  989. #else
  990. nr_pebs_fields = 18;
  991. #endif
  992. memset(&ds_cfg, 0, sizeof(ds_cfg));
  993. ds_cfg = *cfg;
  994. ds_cfg.sizeof_ptr_field =
  995. (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4);
  996. ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3;
  997. ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields;
  998. if (!cpu_has(cpu, X86_FEATURE_BTS)) {
  999. ds_cfg.sizeof_rec[ds_bts] = 0;
  1000. printk(KERN_INFO "[ds] bts not available\n");
  1001. }
  1002. if (!cpu_has(cpu, X86_FEATURE_PEBS)) {
  1003. ds_cfg.sizeof_rec[ds_pebs] = 0;
  1004. printk(KERN_INFO "[ds] pebs not available\n");
  1005. }
  1006. printk(KERN_INFO "[ds] sizes: address: %u bit, ",
  1007. 8 * ds_cfg.sizeof_ptr_field);
  1008. printk("bts/pebs record: %u/%u bytes\n",
  1009. ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
  1010. WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_ptr_field));
  1011. }
  1012. void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
  1013. {
  1014. /* Only configure the first cpu. Others are identical. */
  1015. if (ds_cfg.name)
  1016. return;
  1017. switch (c->x86) {
  1018. case 0x6:
  1019. switch (c->x86_model) {
  1020. case 0x9:
  1021. case 0xd: /* Pentium M */
  1022. ds_configure(&ds_cfg_pentium_m, c);
  1023. break;
  1024. case 0xf:
  1025. case 0x17: /* Core2 */
  1026. case 0x1c: /* Atom */
  1027. ds_configure(&ds_cfg_core2_atom, c);
  1028. break;
  1029. case 0x1a: /* Core i7 */
  1030. default:
  1031. /* Sorry, don't know about them. */
  1032. break;
  1033. }
  1034. break;
  1035. case 0xf:
  1036. switch (c->x86_model) {
  1037. case 0x0:
  1038. case 0x1:
  1039. case 0x2: /* Netburst */
  1040. ds_configure(&ds_cfg_netburst, c);
  1041. break;
  1042. default:
  1043. /* Sorry, don't know about them. */
  1044. break;
  1045. }
  1046. break;
  1047. default:
  1048. /* Sorry, don't know about them. */
  1049. break;
  1050. }
  1051. }
  1052. static inline void ds_take_timestamp(struct ds_context *context,
  1053. enum bts_qualifier qualifier,
  1054. struct task_struct *task)
  1055. {
  1056. struct bts_tracer *tracer = context->bts_master;
  1057. struct bts_struct ts;
  1058. /* Prevent compilers from reading the tracer pointer twice. */
  1059. barrier();
  1060. if (!tracer || !(tracer->flags & BTS_TIMESTAMPS))
  1061. return;
  1062. memset(&ts, 0, sizeof(ts));
  1063. ts.qualifier = qualifier;
  1064. ts.variant.event.clock = trace_clock_global();
  1065. ts.variant.event.pid = task->pid;
  1066. bts_write(tracer, &ts);
  1067. }
  1068. /*
  1069. * Change the DS configuration from tracing prev to tracing next.
  1070. */
  1071. void ds_switch_to(struct task_struct *prev, struct task_struct *next)
  1072. {
  1073. struct ds_context *prev_ctx = prev->thread.ds_ctx;
  1074. struct ds_context *next_ctx = next->thread.ds_ctx;
  1075. unsigned long debugctlmsr = next->thread.debugctlmsr;
  1076. /* Make sure all data is read before we start. */
  1077. barrier();
  1078. if (prev_ctx) {
  1079. update_debugctlmsr(0);
  1080. ds_take_timestamp(prev_ctx, bts_task_departs, prev);
  1081. }
  1082. if (next_ctx) {
  1083. ds_take_timestamp(next_ctx, bts_task_arrives, next);
  1084. wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
  1085. }
  1086. update_debugctlmsr(debugctlmsr);
  1087. }
  1088. static __init int ds_selftest(void)
  1089. {
  1090. if (ds_cfg.sizeof_rec[ds_bts]) {
  1091. int error;
  1092. error = ds_selftest_bts();
  1093. if (error) {
  1094. WARN(1, "[ds] selftest failed. disabling bts.\n");
  1095. ds_cfg.sizeof_rec[ds_bts] = 0;
  1096. }
  1097. }
  1098. if (ds_cfg.sizeof_rec[ds_pebs]) {
  1099. int error;
  1100. error = ds_selftest_pebs();
  1101. if (error) {
  1102. WARN(1, "[ds] selftest failed. disabling pebs.\n");
  1103. ds_cfg.sizeof_rec[ds_pebs] = 0;
  1104. }
  1105. }
  1106. return 0;
  1107. }
  1108. device_initcall(ds_selftest);