ring_buffer.c 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694
  1. /*
  2. * Generic ring buffer
  3. *
  4. * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
  5. */
  6. #include <linux/ring_buffer.h>
  7. #include <linux/spinlock.h>
  8. #include <linux/debugfs.h>
  9. #include <linux/uaccess.h>
  10. #include <linux/module.h>
  11. #include <linux/percpu.h>
  12. #include <linux/mutex.h>
  13. #include <linux/sched.h> /* used for sched_clock() (for now) */
  14. #include <linux/init.h>
  15. #include <linux/hash.h>
  16. #include <linux/list.h>
  17. #include <linux/fs.h>
  18. /* Up this if you want to test the TIME_EXTENTS and normalization */
  19. #define DEBUG_SHIFT 0
  20. /* FIXME!!! */
  21. u64 ring_buffer_time_stamp(int cpu)
  22. {
  23. /* shift to debug/test normalization and TIME_EXTENTS */
  24. return sched_clock() << DEBUG_SHIFT;
  25. }
  26. void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
  27. {
  28. /* Just stupid testing the normalize function and deltas */
  29. *ts >>= DEBUG_SHIFT;
  30. }
  31. #define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
  32. #define RB_ALIGNMENT_SHIFT 2
  33. #define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT)
  34. #define RB_MAX_SMALL_DATA 28
  35. enum {
  36. RB_LEN_TIME_EXTEND = 8,
  37. RB_LEN_TIME_STAMP = 16,
  38. };
  39. /* inline for ring buffer fast paths */
  40. static inline unsigned
  41. rb_event_length(struct ring_buffer_event *event)
  42. {
  43. unsigned length;
  44. switch (event->type) {
  45. case RINGBUF_TYPE_PADDING:
  46. /* undefined */
  47. return -1;
  48. case RINGBUF_TYPE_TIME_EXTEND:
  49. return RB_LEN_TIME_EXTEND;
  50. case RINGBUF_TYPE_TIME_STAMP:
  51. return RB_LEN_TIME_STAMP;
  52. case RINGBUF_TYPE_DATA:
  53. if (event->len)
  54. length = event->len << RB_ALIGNMENT_SHIFT;
  55. else
  56. length = event->array[0];
  57. return length + RB_EVNT_HDR_SIZE;
  58. default:
  59. BUG();
  60. }
  61. /* not hit */
  62. return 0;
  63. }
  64. /**
  65. * ring_buffer_event_length - return the length of the event
  66. * @event: the event to get the length of
  67. */
  68. unsigned ring_buffer_event_length(struct ring_buffer_event *event)
  69. {
  70. return rb_event_length(event);
  71. }
  72. /* inline for ring buffer fast paths */
  73. static inline void *
  74. rb_event_data(struct ring_buffer_event *event)
  75. {
  76. BUG_ON(event->type != RINGBUF_TYPE_DATA);
  77. /* If length is in len field, then array[0] has the data */
  78. if (event->len)
  79. return (void *)&event->array[0];
  80. /* Otherwise length is in array[0] and array[1] has the data */
  81. return (void *)&event->array[1];
  82. }
  83. /**
  84. * ring_buffer_event_data - return the data of the event
  85. * @event: the event to get the data from
  86. */
  87. void *ring_buffer_event_data(struct ring_buffer_event *event)
  88. {
  89. return rb_event_data(event);
  90. }
  91. #define for_each_buffer_cpu(buffer, cpu) \
  92. for_each_cpu_mask(cpu, buffer->cpumask)
  93. #define TS_SHIFT 27
  94. #define TS_MASK ((1ULL << TS_SHIFT) - 1)
  95. #define TS_DELTA_TEST (~TS_MASK)
  96. /*
  97. * This hack stolen from mm/slob.c.
  98. * We can store per page timing information in the page frame of the page.
  99. * Thanks to Peter Zijlstra for suggesting this idea.
  100. */
  101. struct buffer_page {
  102. union {
  103. struct {
  104. unsigned long flags; /* mandatory */
  105. atomic_t _count; /* mandatory */
  106. u64 time_stamp; /* page time stamp */
  107. unsigned size; /* size of page data */
  108. struct list_head list; /* list of free pages */
  109. };
  110. struct page page;
  111. };
  112. };
  113. /*
  114. * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
  115. * this issue out.
  116. */
  117. static inline void free_buffer_page(struct buffer_page *bpage)
  118. {
  119. reset_page_mapcount(&bpage->page);
  120. bpage->page.mapping = NULL;
  121. __free_page(&bpage->page);
  122. }
  123. /*
  124. * We need to fit the time_stamp delta into 27 bits.
  125. */
  126. static inline int test_time_stamp(u64 delta)
  127. {
  128. if (delta & TS_DELTA_TEST)
  129. return 1;
  130. return 0;
  131. }
  132. #define BUF_PAGE_SIZE PAGE_SIZE
  133. /*
  134. * head_page == tail_page && head == tail then buffer is empty.
  135. */
  136. struct ring_buffer_per_cpu {
  137. int cpu;
  138. struct ring_buffer *buffer;
  139. spinlock_t lock;
  140. struct lock_class_key lock_key;
  141. struct list_head pages;
  142. unsigned long head; /* read from head */
  143. unsigned long tail; /* write to tail */
  144. struct buffer_page *head_page;
  145. struct buffer_page *tail_page;
  146. unsigned long overrun;
  147. unsigned long entries;
  148. u64 write_stamp;
  149. u64 read_stamp;
  150. atomic_t record_disabled;
  151. };
  152. struct ring_buffer {
  153. unsigned long size;
  154. unsigned pages;
  155. unsigned flags;
  156. int cpus;
  157. cpumask_t cpumask;
  158. atomic_t record_disabled;
  159. struct mutex mutex;
  160. struct ring_buffer_per_cpu **buffers;
  161. };
  162. struct ring_buffer_iter {
  163. struct ring_buffer_per_cpu *cpu_buffer;
  164. unsigned long head;
  165. struct buffer_page *head_page;
  166. u64 read_stamp;
  167. };
  168. #define RB_WARN_ON(buffer, cond) \
  169. if (unlikely(cond)) { \
  170. atomic_inc(&buffer->record_disabled); \
  171. WARN_ON(1); \
  172. return -1; \
  173. }
  174. /**
  175. * check_pages - integrity check of buffer pages
  176. * @cpu_buffer: CPU buffer with pages to test
  177. *
  178. * As a safty measure we check to make sure the data pages have not
  179. * been corrupted.
  180. */
  181. static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
  182. {
  183. struct list_head *head = &cpu_buffer->pages;
  184. struct buffer_page *page, *tmp;
  185. RB_WARN_ON(cpu_buffer, head->next->prev != head);
  186. RB_WARN_ON(cpu_buffer, head->prev->next != head);
  187. list_for_each_entry_safe(page, tmp, head, list) {
  188. RB_WARN_ON(cpu_buffer, page->list.next->prev != &page->list);
  189. RB_WARN_ON(cpu_buffer, page->list.prev->next != &page->list);
  190. }
  191. return 0;
  192. }
  193. static unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
  194. {
  195. return cpu_buffer->head_page->size;
  196. }
  197. static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
  198. unsigned nr_pages)
  199. {
  200. struct list_head *head = &cpu_buffer->pages;
  201. struct buffer_page *page, *tmp;
  202. unsigned long addr;
  203. LIST_HEAD(pages);
  204. unsigned i;
  205. for (i = 0; i < nr_pages; i++) {
  206. addr = __get_free_page(GFP_KERNEL);
  207. if (!addr)
  208. goto free_pages;
  209. page = (struct buffer_page *)virt_to_page(addr);
  210. list_add(&page->list, &pages);
  211. }
  212. list_splice(&pages, head);
  213. rb_check_pages(cpu_buffer);
  214. return 0;
  215. free_pages:
  216. list_for_each_entry_safe(page, tmp, &pages, list) {
  217. list_del_init(&page->list);
  218. free_buffer_page(page);
  219. }
  220. return -ENOMEM;
  221. }
  222. static struct ring_buffer_per_cpu *
  223. rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
  224. {
  225. struct ring_buffer_per_cpu *cpu_buffer;
  226. int ret;
  227. cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
  228. GFP_KERNEL, cpu_to_node(cpu));
  229. if (!cpu_buffer)
  230. return NULL;
  231. cpu_buffer->cpu = cpu;
  232. cpu_buffer->buffer = buffer;
  233. spin_lock_init(&cpu_buffer->lock);
  234. INIT_LIST_HEAD(&cpu_buffer->pages);
  235. ret = rb_allocate_pages(cpu_buffer, buffer->pages);
  236. if (ret < 0)
  237. goto fail_free_buffer;
  238. cpu_buffer->head_page
  239. = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
  240. cpu_buffer->tail_page
  241. = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
  242. return cpu_buffer;
  243. fail_free_buffer:
  244. kfree(cpu_buffer);
  245. return NULL;
  246. }
  247. static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
  248. {
  249. struct list_head *head = &cpu_buffer->pages;
  250. struct buffer_page *page, *tmp;
  251. list_for_each_entry_safe(page, tmp, head, list) {
  252. list_del_init(&page->list);
  253. free_buffer_page(page);
  254. }
  255. kfree(cpu_buffer);
  256. }
  257. /*
  258. * Causes compile errors if the struct buffer_page gets bigger
  259. * than the struct page.
  260. */
  261. extern int ring_buffer_page_too_big(void);
  262. /**
  263. * ring_buffer_alloc - allocate a new ring_buffer
  264. * @size: the size in bytes that is needed.
  265. * @flags: attributes to set for the ring buffer.
  266. *
  267. * Currently the only flag that is available is the RB_FL_OVERWRITE
  268. * flag. This flag means that the buffer will overwrite old data
  269. * when the buffer wraps. If this flag is not set, the buffer will
  270. * drop data when the tail hits the head.
  271. */
  272. struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
  273. {
  274. struct ring_buffer *buffer;
  275. int bsize;
  276. int cpu;
  277. /* Paranoid! Optimizes out when all is well */
  278. if (sizeof(struct buffer_page) > sizeof(struct page))
  279. ring_buffer_page_too_big();
  280. /* keep it in its own cache line */
  281. buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
  282. GFP_KERNEL);
  283. if (!buffer)
  284. return NULL;
  285. buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
  286. buffer->flags = flags;
  287. /* need at least two pages */
  288. if (buffer->pages == 1)
  289. buffer->pages++;
  290. buffer->cpumask = cpu_possible_map;
  291. buffer->cpus = nr_cpu_ids;
  292. bsize = sizeof(void *) * nr_cpu_ids;
  293. buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
  294. GFP_KERNEL);
  295. if (!buffer->buffers)
  296. goto fail_free_buffer;
  297. for_each_buffer_cpu(buffer, cpu) {
  298. buffer->buffers[cpu] =
  299. rb_allocate_cpu_buffer(buffer, cpu);
  300. if (!buffer->buffers[cpu])
  301. goto fail_free_buffers;
  302. }
  303. mutex_init(&buffer->mutex);
  304. return buffer;
  305. fail_free_buffers:
  306. for_each_buffer_cpu(buffer, cpu) {
  307. if (buffer->buffers[cpu])
  308. rb_free_cpu_buffer(buffer->buffers[cpu]);
  309. }
  310. kfree(buffer->buffers);
  311. fail_free_buffer:
  312. kfree(buffer);
  313. return NULL;
  314. }
  315. /**
  316. * ring_buffer_free - free a ring buffer.
  317. * @buffer: the buffer to free.
  318. */
  319. void
  320. ring_buffer_free(struct ring_buffer *buffer)
  321. {
  322. int cpu;
  323. for_each_buffer_cpu(buffer, cpu)
  324. rb_free_cpu_buffer(buffer->buffers[cpu]);
  325. kfree(buffer);
  326. }
  327. static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
  328. static void
  329. rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
  330. {
  331. struct buffer_page *page;
  332. struct list_head *p;
  333. unsigned i;
  334. atomic_inc(&cpu_buffer->record_disabled);
  335. synchronize_sched();
  336. for (i = 0; i < nr_pages; i++) {
  337. BUG_ON(list_empty(&cpu_buffer->pages));
  338. p = cpu_buffer->pages.next;
  339. page = list_entry(p, struct buffer_page, list);
  340. list_del_init(&page->list);
  341. free_buffer_page(page);
  342. }
  343. BUG_ON(list_empty(&cpu_buffer->pages));
  344. rb_reset_cpu(cpu_buffer);
  345. rb_check_pages(cpu_buffer);
  346. atomic_dec(&cpu_buffer->record_disabled);
  347. }
  348. static void
  349. rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
  350. struct list_head *pages, unsigned nr_pages)
  351. {
  352. struct buffer_page *page;
  353. struct list_head *p;
  354. unsigned i;
  355. atomic_inc(&cpu_buffer->record_disabled);
  356. synchronize_sched();
  357. for (i = 0; i < nr_pages; i++) {
  358. BUG_ON(list_empty(pages));
  359. p = pages->next;
  360. page = list_entry(p, struct buffer_page, list);
  361. list_del_init(&page->list);
  362. list_add_tail(&page->list, &cpu_buffer->pages);
  363. }
  364. rb_reset_cpu(cpu_buffer);
  365. rb_check_pages(cpu_buffer);
  366. atomic_dec(&cpu_buffer->record_disabled);
  367. }
  368. /**
  369. * ring_buffer_resize - resize the ring buffer
  370. * @buffer: the buffer to resize.
  371. * @size: the new size.
  372. *
  373. * The tracer is responsible for making sure that the buffer is
  374. * not being used while changing the size.
  375. * Note: We may be able to change the above requirement by using
  376. * RCU synchronizations.
  377. *
  378. * Minimum size is 2 * BUF_PAGE_SIZE.
  379. *
  380. * Returns -1 on failure.
  381. */
  382. int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
  383. {
  384. struct ring_buffer_per_cpu *cpu_buffer;
  385. unsigned nr_pages, rm_pages, new_pages;
  386. struct buffer_page *page, *tmp;
  387. unsigned long buffer_size;
  388. unsigned long addr;
  389. LIST_HEAD(pages);
  390. int i, cpu;
  391. size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
  392. size *= BUF_PAGE_SIZE;
  393. buffer_size = buffer->pages * BUF_PAGE_SIZE;
  394. /* we need a minimum of two pages */
  395. if (size < BUF_PAGE_SIZE * 2)
  396. size = BUF_PAGE_SIZE * 2;
  397. if (size == buffer_size)
  398. return size;
  399. mutex_lock(&buffer->mutex);
  400. nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
  401. if (size < buffer_size) {
  402. /* easy case, just free pages */
  403. BUG_ON(nr_pages >= buffer->pages);
  404. rm_pages = buffer->pages - nr_pages;
  405. for_each_buffer_cpu(buffer, cpu) {
  406. cpu_buffer = buffer->buffers[cpu];
  407. rb_remove_pages(cpu_buffer, rm_pages);
  408. }
  409. goto out;
  410. }
  411. /*
  412. * This is a bit more difficult. We only want to add pages
  413. * when we can allocate enough for all CPUs. We do this
  414. * by allocating all the pages and storing them on a local
  415. * link list. If we succeed in our allocation, then we
  416. * add these pages to the cpu_buffers. Otherwise we just free
  417. * them all and return -ENOMEM;
  418. */
  419. BUG_ON(nr_pages <= buffer->pages);
  420. new_pages = nr_pages - buffer->pages;
  421. for_each_buffer_cpu(buffer, cpu) {
  422. for (i = 0; i < new_pages; i++) {
  423. addr = __get_free_page(GFP_KERNEL);
  424. if (!addr)
  425. goto free_pages;
  426. page = (struct buffer_page *)virt_to_page(addr);
  427. list_add(&page->list, &pages);
  428. }
  429. }
  430. for_each_buffer_cpu(buffer, cpu) {
  431. cpu_buffer = buffer->buffers[cpu];
  432. rb_insert_pages(cpu_buffer, &pages, new_pages);
  433. }
  434. BUG_ON(!list_empty(&pages));
  435. out:
  436. buffer->pages = nr_pages;
  437. mutex_unlock(&buffer->mutex);
  438. return size;
  439. free_pages:
  440. list_for_each_entry_safe(page, tmp, &pages, list) {
  441. list_del_init(&page->list);
  442. free_buffer_page(page);
  443. }
  444. return -ENOMEM;
  445. }
  446. static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
  447. {
  448. return cpu_buffer->head_page == cpu_buffer->tail_page &&
  449. cpu_buffer->head == cpu_buffer->tail;
  450. }
  451. static inline int rb_null_event(struct ring_buffer_event *event)
  452. {
  453. return event->type == RINGBUF_TYPE_PADDING;
  454. }
  455. static inline void *rb_page_index(struct buffer_page *page, unsigned index)
  456. {
  457. void *addr = page_address(&page->page);
  458. return addr + index;
  459. }
  460. static inline struct ring_buffer_event *
  461. rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
  462. {
  463. return rb_page_index(cpu_buffer->head_page,
  464. cpu_buffer->head);
  465. }
  466. static inline struct ring_buffer_event *
  467. rb_iter_head_event(struct ring_buffer_iter *iter)
  468. {
  469. return rb_page_index(iter->head_page,
  470. iter->head);
  471. }
  472. /*
  473. * When the tail hits the head and the buffer is in overwrite mode,
  474. * the head jumps to the next page and all content on the previous
  475. * page is discarded. But before doing so, we update the overrun
  476. * variable of the buffer.
  477. */
  478. static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
  479. {
  480. struct ring_buffer_event *event;
  481. unsigned long head;
  482. for (head = 0; head < rb_head_size(cpu_buffer);
  483. head += rb_event_length(event)) {
  484. event = rb_page_index(cpu_buffer->head_page, head);
  485. BUG_ON(rb_null_event(event));
  486. /* Only count data entries */
  487. if (event->type != RINGBUF_TYPE_DATA)
  488. continue;
  489. cpu_buffer->overrun++;
  490. cpu_buffer->entries--;
  491. }
  492. }
  493. static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
  494. struct buffer_page **page)
  495. {
  496. struct list_head *p = (*page)->list.next;
  497. if (p == &cpu_buffer->pages)
  498. p = p->next;
  499. *page = list_entry(p, struct buffer_page, list);
  500. }
  501. static inline void
  502. rb_add_stamp(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
  503. {
  504. cpu_buffer->tail_page->time_stamp = *ts;
  505. cpu_buffer->write_stamp = *ts;
  506. }
  507. static void rb_reset_read_page(struct ring_buffer_per_cpu *cpu_buffer)
  508. {
  509. cpu_buffer->read_stamp = cpu_buffer->head_page->time_stamp;
  510. cpu_buffer->head = 0;
  511. }
  512. static void
  513. rb_reset_iter_read_page(struct ring_buffer_iter *iter)
  514. {
  515. iter->read_stamp = iter->head_page->time_stamp;
  516. iter->head = 0;
  517. }
  518. /**
  519. * ring_buffer_update_event - update event type and data
  520. * @event: the even to update
  521. * @type: the type of event
  522. * @length: the size of the event field in the ring buffer
  523. *
  524. * Update the type and data fields of the event. The length
  525. * is the actual size that is written to the ring buffer,
  526. * and with this, we can determine what to place into the
  527. * data field.
  528. */
  529. static inline void
  530. rb_update_event(struct ring_buffer_event *event,
  531. unsigned type, unsigned length)
  532. {
  533. event->type = type;
  534. switch (type) {
  535. case RINGBUF_TYPE_PADDING:
  536. break;
  537. case RINGBUF_TYPE_TIME_EXTEND:
  538. event->len =
  539. (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
  540. >> RB_ALIGNMENT_SHIFT;
  541. break;
  542. case RINGBUF_TYPE_TIME_STAMP:
  543. event->len =
  544. (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
  545. >> RB_ALIGNMENT_SHIFT;
  546. break;
  547. case RINGBUF_TYPE_DATA:
  548. length -= RB_EVNT_HDR_SIZE;
  549. if (length > RB_MAX_SMALL_DATA) {
  550. event->len = 0;
  551. event->array[0] = length;
  552. } else
  553. event->len =
  554. (length + (RB_ALIGNMENT-1))
  555. >> RB_ALIGNMENT_SHIFT;
  556. break;
  557. default:
  558. BUG();
  559. }
  560. }
  561. static inline unsigned rb_calculate_event_length(unsigned length)
  562. {
  563. struct ring_buffer_event event; /* Used only for sizeof array */
  564. /* zero length can cause confusions */
  565. if (!length)
  566. length = 1;
  567. if (length > RB_MAX_SMALL_DATA)
  568. length += sizeof(event.array[0]);
  569. length += RB_EVNT_HDR_SIZE;
  570. length = ALIGN(length, RB_ALIGNMENT);
  571. return length;
  572. }
  573. static struct ring_buffer_event *
  574. __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
  575. unsigned type, unsigned long length, u64 *ts)
  576. {
  577. struct buffer_page *head_page, *tail_page;
  578. unsigned long tail;
  579. struct ring_buffer *buffer = cpu_buffer->buffer;
  580. struct ring_buffer_event *event;
  581. tail_page = cpu_buffer->tail_page;
  582. head_page = cpu_buffer->head_page;
  583. tail = cpu_buffer->tail;
  584. if (tail + length > BUF_PAGE_SIZE) {
  585. struct buffer_page *next_page = tail_page;
  586. rb_inc_page(cpu_buffer, &next_page);
  587. if (next_page == head_page) {
  588. if (!(buffer->flags & RB_FL_OVERWRITE))
  589. return NULL;
  590. /* count overflows */
  591. rb_update_overflow(cpu_buffer);
  592. rb_inc_page(cpu_buffer, &head_page);
  593. cpu_buffer->head_page = head_page;
  594. rb_reset_read_page(cpu_buffer);
  595. }
  596. if (tail != BUF_PAGE_SIZE) {
  597. event = rb_page_index(tail_page, tail);
  598. /* page padding */
  599. event->type = RINGBUF_TYPE_PADDING;
  600. }
  601. tail_page->size = tail;
  602. tail_page = next_page;
  603. tail_page->size = 0;
  604. tail = 0;
  605. cpu_buffer->tail_page = tail_page;
  606. cpu_buffer->tail = tail;
  607. rb_add_stamp(cpu_buffer, ts);
  608. }
  609. BUG_ON(tail + length > BUF_PAGE_SIZE);
  610. event = rb_page_index(tail_page, tail);
  611. rb_update_event(event, type, length);
  612. return event;
  613. }
  614. static int
  615. rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
  616. u64 *ts, u64 *delta)
  617. {
  618. struct ring_buffer_event *event;
  619. static int once;
  620. if (unlikely(*delta > (1ULL << 59) && !once++)) {
  621. printk(KERN_WARNING "Delta way too big! %llu"
  622. " ts=%llu write stamp = %llu\n",
  623. *delta, *ts, cpu_buffer->write_stamp);
  624. WARN_ON(1);
  625. }
  626. /*
  627. * The delta is too big, we to add a
  628. * new timestamp.
  629. */
  630. event = __rb_reserve_next(cpu_buffer,
  631. RINGBUF_TYPE_TIME_EXTEND,
  632. RB_LEN_TIME_EXTEND,
  633. ts);
  634. if (!event)
  635. return -1;
  636. /* check to see if we went to the next page */
  637. if (cpu_buffer->tail) {
  638. /* Still on same page, update timestamp */
  639. event->time_delta = *delta & TS_MASK;
  640. event->array[0] = *delta >> TS_SHIFT;
  641. /* commit the time event */
  642. cpu_buffer->tail +=
  643. rb_event_length(event);
  644. cpu_buffer->write_stamp = *ts;
  645. *delta = 0;
  646. }
  647. return 0;
  648. }
  649. static struct ring_buffer_event *
  650. rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
  651. unsigned type, unsigned long length)
  652. {
  653. struct ring_buffer_event *event;
  654. u64 ts, delta;
  655. ts = ring_buffer_time_stamp(cpu_buffer->cpu);
  656. if (cpu_buffer->tail) {
  657. delta = ts - cpu_buffer->write_stamp;
  658. if (test_time_stamp(delta)) {
  659. int ret;
  660. ret = rb_add_time_stamp(cpu_buffer, &ts, &delta);
  661. if (ret < 0)
  662. return NULL;
  663. }
  664. } else {
  665. rb_add_stamp(cpu_buffer, &ts);
  666. delta = 0;
  667. }
  668. event = __rb_reserve_next(cpu_buffer, type, length, &ts);
  669. if (!event)
  670. return NULL;
  671. /* If the reserve went to the next page, our delta is zero */
  672. if (!cpu_buffer->tail)
  673. delta = 0;
  674. event->time_delta = delta;
  675. return event;
  676. }
  677. /**
  678. * ring_buffer_lock_reserve - reserve a part of the buffer
  679. * @buffer: the ring buffer to reserve from
  680. * @length: the length of the data to reserve (excluding event header)
  681. * @flags: a pointer to save the interrupt flags
  682. *
  683. * Returns a reseverd event on the ring buffer to copy directly to.
  684. * The user of this interface will need to get the body to write into
  685. * and can use the ring_buffer_event_data() interface.
  686. *
  687. * The length is the length of the data needed, not the event length
  688. * which also includes the event header.
  689. *
  690. * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
  691. * If NULL is returned, then nothing has been allocated or locked.
  692. */
  693. struct ring_buffer_event *
  694. ring_buffer_lock_reserve(struct ring_buffer *buffer,
  695. unsigned long length,
  696. unsigned long *flags)
  697. {
  698. struct ring_buffer_per_cpu *cpu_buffer;
  699. struct ring_buffer_event *event;
  700. int cpu;
  701. if (atomic_read(&buffer->record_disabled))
  702. return NULL;
  703. local_irq_save(*flags);
  704. cpu = raw_smp_processor_id();
  705. if (!cpu_isset(cpu, buffer->cpumask))
  706. goto out_irq;
  707. cpu_buffer = buffer->buffers[cpu];
  708. spin_lock(&cpu_buffer->lock);
  709. if (atomic_read(&cpu_buffer->record_disabled))
  710. goto no_record;
  711. length = rb_calculate_event_length(length);
  712. if (length > BUF_PAGE_SIZE)
  713. return NULL;
  714. event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
  715. if (!event)
  716. goto no_record;
  717. return event;
  718. no_record:
  719. spin_unlock(&cpu_buffer->lock);
  720. out_irq:
  721. local_irq_restore(*flags);
  722. return NULL;
  723. }
  724. static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
  725. struct ring_buffer_event *event)
  726. {
  727. cpu_buffer->tail += rb_event_length(event);
  728. cpu_buffer->tail_page->size = cpu_buffer->tail;
  729. cpu_buffer->write_stamp += event->time_delta;
  730. cpu_buffer->entries++;
  731. }
  732. /**
  733. * ring_buffer_unlock_commit - commit a reserved
  734. * @buffer: The buffer to commit to
  735. * @event: The event pointer to commit.
  736. * @flags: the interrupt flags received from ring_buffer_lock_reserve.
  737. *
  738. * This commits the data to the ring buffer, and releases any locks held.
  739. *
  740. * Must be paired with ring_buffer_lock_reserve.
  741. */
  742. int ring_buffer_unlock_commit(struct ring_buffer *buffer,
  743. struct ring_buffer_event *event,
  744. unsigned long flags)
  745. {
  746. struct ring_buffer_per_cpu *cpu_buffer;
  747. int cpu = raw_smp_processor_id();
  748. cpu_buffer = buffer->buffers[cpu];
  749. assert_spin_locked(&cpu_buffer->lock);
  750. rb_commit(cpu_buffer, event);
  751. spin_unlock(&cpu_buffer->lock);
  752. local_irq_restore(flags);
  753. return 0;
  754. }
  755. /**
  756. * ring_buffer_write - write data to the buffer without reserving
  757. * @buffer: The ring buffer to write to.
  758. * @length: The length of the data being written (excluding the event header)
  759. * @data: The data to write to the buffer.
  760. *
  761. * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
  762. * one function. If you already have the data to write to the buffer, it
  763. * may be easier to simply call this function.
  764. *
  765. * Note, like ring_buffer_lock_reserve, the length is the length of the data
  766. * and not the length of the event which would hold the header.
  767. */
  768. int ring_buffer_write(struct ring_buffer *buffer,
  769. unsigned long length,
  770. void *data)
  771. {
  772. struct ring_buffer_per_cpu *cpu_buffer;
  773. struct ring_buffer_event *event;
  774. unsigned long event_length, flags;
  775. void *body;
  776. int ret = -EBUSY;
  777. int cpu;
  778. if (atomic_read(&buffer->record_disabled))
  779. return -EBUSY;
  780. local_irq_save(flags);
  781. cpu = raw_smp_processor_id();
  782. if (!cpu_isset(cpu, buffer->cpumask))
  783. goto out_irq;
  784. cpu_buffer = buffer->buffers[cpu];
  785. spin_lock(&cpu_buffer->lock);
  786. if (atomic_read(&cpu_buffer->record_disabled))
  787. goto out;
  788. event_length = rb_calculate_event_length(length);
  789. event = rb_reserve_next_event(cpu_buffer,
  790. RINGBUF_TYPE_DATA, event_length);
  791. if (!event)
  792. goto out;
  793. body = rb_event_data(event);
  794. memcpy(body, data, length);
  795. rb_commit(cpu_buffer, event);
  796. ret = 0;
  797. out:
  798. spin_unlock(&cpu_buffer->lock);
  799. out_irq:
  800. local_irq_restore(flags);
  801. return ret;
  802. }
  803. /**
  804. * ring_buffer_lock - lock the ring buffer
  805. * @buffer: The ring buffer to lock
  806. * @flags: The place to store the interrupt flags
  807. *
  808. * This locks all the per CPU buffers.
  809. *
  810. * Must be unlocked by ring_buffer_unlock.
  811. */
  812. void ring_buffer_lock(struct ring_buffer *buffer, unsigned long *flags)
  813. {
  814. struct ring_buffer_per_cpu *cpu_buffer;
  815. int cpu;
  816. local_irq_save(*flags);
  817. for_each_buffer_cpu(buffer, cpu) {
  818. cpu_buffer = buffer->buffers[cpu];
  819. spin_lock(&cpu_buffer->lock);
  820. }
  821. }
  822. /**
  823. * ring_buffer_unlock - unlock a locked buffer
  824. * @buffer: The locked buffer to unlock
  825. * @flags: The interrupt flags received by ring_buffer_lock
  826. */
  827. void ring_buffer_unlock(struct ring_buffer *buffer, unsigned long flags)
  828. {
  829. struct ring_buffer_per_cpu *cpu_buffer;
  830. int cpu;
  831. for (cpu = buffer->cpus - 1; cpu >= 0; cpu--) {
  832. if (!cpu_isset(cpu, buffer->cpumask))
  833. continue;
  834. cpu_buffer = buffer->buffers[cpu];
  835. spin_unlock(&cpu_buffer->lock);
  836. }
  837. local_irq_restore(flags);
  838. }
  839. /**
  840. * ring_buffer_record_disable - stop all writes into the buffer
  841. * @buffer: The ring buffer to stop writes to.
  842. *
  843. * This prevents all writes to the buffer. Any attempt to write
  844. * to the buffer after this will fail and return NULL.
  845. *
  846. * The caller should call synchronize_sched() after this.
  847. */
  848. void ring_buffer_record_disable(struct ring_buffer *buffer)
  849. {
  850. atomic_inc(&buffer->record_disabled);
  851. }
  852. /**
  853. * ring_buffer_record_enable - enable writes to the buffer
  854. * @buffer: The ring buffer to enable writes
  855. *
  856. * Note, multiple disables will need the same number of enables
  857. * to truely enable the writing (much like preempt_disable).
  858. */
  859. void ring_buffer_record_enable(struct ring_buffer *buffer)
  860. {
  861. atomic_dec(&buffer->record_disabled);
  862. }
  863. /**
  864. * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
  865. * @buffer: The ring buffer to stop writes to.
  866. * @cpu: The CPU buffer to stop
  867. *
  868. * This prevents all writes to the buffer. Any attempt to write
  869. * to the buffer after this will fail and return NULL.
  870. *
  871. * The caller should call synchronize_sched() after this.
  872. */
  873. void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
  874. {
  875. struct ring_buffer_per_cpu *cpu_buffer;
  876. if (!cpu_isset(cpu, buffer->cpumask))
  877. return;
  878. cpu_buffer = buffer->buffers[cpu];
  879. atomic_inc(&cpu_buffer->record_disabled);
  880. }
  881. /**
  882. * ring_buffer_record_enable_cpu - enable writes to the buffer
  883. * @buffer: The ring buffer to enable writes
  884. * @cpu: The CPU to enable.
  885. *
  886. * Note, multiple disables will need the same number of enables
  887. * to truely enable the writing (much like preempt_disable).
  888. */
  889. void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
  890. {
  891. struct ring_buffer_per_cpu *cpu_buffer;
  892. if (!cpu_isset(cpu, buffer->cpumask))
  893. return;
  894. cpu_buffer = buffer->buffers[cpu];
  895. atomic_dec(&cpu_buffer->record_disabled);
  896. }
  897. /**
  898. * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
  899. * @buffer: The ring buffer
  900. * @cpu: The per CPU buffer to get the entries from.
  901. */
  902. unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
  903. {
  904. struct ring_buffer_per_cpu *cpu_buffer;
  905. if (!cpu_isset(cpu, buffer->cpumask))
  906. return 0;
  907. cpu_buffer = buffer->buffers[cpu];
  908. return cpu_buffer->entries;
  909. }
  910. /**
  911. * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
  912. * @buffer: The ring buffer
  913. * @cpu: The per CPU buffer to get the number of overruns from
  914. */
  915. unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
  916. {
  917. struct ring_buffer_per_cpu *cpu_buffer;
  918. if (!cpu_isset(cpu, buffer->cpumask))
  919. return 0;
  920. cpu_buffer = buffer->buffers[cpu];
  921. return cpu_buffer->overrun;
  922. }
  923. /**
  924. * ring_buffer_entries - get the number of entries in a buffer
  925. * @buffer: The ring buffer
  926. *
  927. * Returns the total number of entries in the ring buffer
  928. * (all CPU entries)
  929. */
  930. unsigned long ring_buffer_entries(struct ring_buffer *buffer)
  931. {
  932. struct ring_buffer_per_cpu *cpu_buffer;
  933. unsigned long entries = 0;
  934. int cpu;
  935. /* if you care about this being correct, lock the buffer */
  936. for_each_buffer_cpu(buffer, cpu) {
  937. cpu_buffer = buffer->buffers[cpu];
  938. entries += cpu_buffer->entries;
  939. }
  940. return entries;
  941. }
  942. /**
  943. * ring_buffer_overrun_cpu - get the number of overruns in buffer
  944. * @buffer: The ring buffer
  945. *
  946. * Returns the total number of overruns in the ring buffer
  947. * (all CPU entries)
  948. */
  949. unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
  950. {
  951. struct ring_buffer_per_cpu *cpu_buffer;
  952. unsigned long overruns = 0;
  953. int cpu;
  954. /* if you care about this being correct, lock the buffer */
  955. for_each_buffer_cpu(buffer, cpu) {
  956. cpu_buffer = buffer->buffers[cpu];
  957. overruns += cpu_buffer->overrun;
  958. }
  959. return overruns;
  960. }
  961. /**
  962. * ring_buffer_iter_reset - reset an iterator
  963. * @iter: The iterator to reset
  964. *
  965. * Resets the iterator, so that it will start from the beginning
  966. * again.
  967. */
  968. void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
  969. {
  970. struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
  971. iter->head_page = cpu_buffer->head_page;
  972. iter->head = cpu_buffer->head;
  973. rb_reset_iter_read_page(iter);
  974. }
  975. /**
  976. * ring_buffer_iter_empty - check if an iterator has no more to read
  977. * @iter: The iterator to check
  978. */
  979. int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
  980. {
  981. struct ring_buffer_per_cpu *cpu_buffer;
  982. cpu_buffer = iter->cpu_buffer;
  983. return iter->head_page == cpu_buffer->tail_page &&
  984. iter->head == cpu_buffer->tail;
  985. }
  986. static void
  987. rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
  988. struct ring_buffer_event *event)
  989. {
  990. u64 delta;
  991. switch (event->type) {
  992. case RINGBUF_TYPE_PADDING:
  993. return;
  994. case RINGBUF_TYPE_TIME_EXTEND:
  995. delta = event->array[0];
  996. delta <<= TS_SHIFT;
  997. delta += event->time_delta;
  998. cpu_buffer->read_stamp += delta;
  999. return;
  1000. case RINGBUF_TYPE_TIME_STAMP:
  1001. /* FIXME: not implemented */
  1002. return;
  1003. case RINGBUF_TYPE_DATA:
  1004. cpu_buffer->read_stamp += event->time_delta;
  1005. return;
  1006. default:
  1007. BUG();
  1008. }
  1009. return;
  1010. }
  1011. static void
  1012. rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
  1013. struct ring_buffer_event *event)
  1014. {
  1015. u64 delta;
  1016. switch (event->type) {
  1017. case RINGBUF_TYPE_PADDING:
  1018. return;
  1019. case RINGBUF_TYPE_TIME_EXTEND:
  1020. delta = event->array[0];
  1021. delta <<= TS_SHIFT;
  1022. delta += event->time_delta;
  1023. iter->read_stamp += delta;
  1024. return;
  1025. case RINGBUF_TYPE_TIME_STAMP:
  1026. /* FIXME: not implemented */
  1027. return;
  1028. case RINGBUF_TYPE_DATA:
  1029. iter->read_stamp += event->time_delta;
  1030. return;
  1031. default:
  1032. BUG();
  1033. }
  1034. return;
  1035. }
  1036. static void rb_advance_head(struct ring_buffer_per_cpu *cpu_buffer)
  1037. {
  1038. struct ring_buffer_event *event;
  1039. unsigned length;
  1040. /*
  1041. * Check if we are at the end of the buffer.
  1042. */
  1043. if (cpu_buffer->head >= cpu_buffer->head_page->size) {
  1044. BUG_ON(cpu_buffer->head_page == cpu_buffer->tail_page);
  1045. rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
  1046. rb_reset_read_page(cpu_buffer);
  1047. return;
  1048. }
  1049. event = rb_head_event(cpu_buffer);
  1050. if (event->type == RINGBUF_TYPE_DATA)
  1051. cpu_buffer->entries--;
  1052. length = rb_event_length(event);
  1053. /*
  1054. * This should not be called to advance the header if we are
  1055. * at the tail of the buffer.
  1056. */
  1057. BUG_ON((cpu_buffer->head_page == cpu_buffer->tail_page) &&
  1058. (cpu_buffer->head + length > cpu_buffer->tail));
  1059. rb_update_read_stamp(cpu_buffer, event);
  1060. cpu_buffer->head += length;
  1061. /* check for end of page */
  1062. if ((cpu_buffer->head >= cpu_buffer->head_page->size) &&
  1063. (cpu_buffer->head_page != cpu_buffer->tail_page))
  1064. rb_advance_head(cpu_buffer);
  1065. }
  1066. static void rb_advance_iter(struct ring_buffer_iter *iter)
  1067. {
  1068. struct ring_buffer *buffer;
  1069. struct ring_buffer_per_cpu *cpu_buffer;
  1070. struct ring_buffer_event *event;
  1071. unsigned length;
  1072. cpu_buffer = iter->cpu_buffer;
  1073. buffer = cpu_buffer->buffer;
  1074. /*
  1075. * Check if we are at the end of the buffer.
  1076. */
  1077. if (iter->head >= iter->head_page->size) {
  1078. BUG_ON(iter->head_page == cpu_buffer->tail_page);
  1079. rb_inc_page(cpu_buffer, &iter->head_page);
  1080. rb_reset_iter_read_page(iter);
  1081. return;
  1082. }
  1083. event = rb_iter_head_event(iter);
  1084. length = rb_event_length(event);
  1085. /*
  1086. * This should not be called to advance the header if we are
  1087. * at the tail of the buffer.
  1088. */
  1089. BUG_ON((iter->head_page == cpu_buffer->tail_page) &&
  1090. (iter->head + length > cpu_buffer->tail));
  1091. rb_update_iter_read_stamp(iter, event);
  1092. iter->head += length;
  1093. /* check for end of page padding */
  1094. if ((iter->head >= iter->head_page->size) &&
  1095. (iter->head_page != cpu_buffer->tail_page))
  1096. rb_advance_iter(iter);
  1097. }
  1098. /**
  1099. * ring_buffer_peek - peek at the next event to be read
  1100. * @buffer: The ring buffer to read
  1101. * @cpu: The cpu to peak at
  1102. * @ts: The timestamp counter of this event.
  1103. *
  1104. * This will return the event that will be read next, but does
  1105. * not consume the data.
  1106. */
  1107. struct ring_buffer_event *
  1108. ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
  1109. {
  1110. struct ring_buffer_per_cpu *cpu_buffer;
  1111. struct ring_buffer_event *event;
  1112. if (!cpu_isset(cpu, buffer->cpumask))
  1113. return NULL;
  1114. cpu_buffer = buffer->buffers[cpu];
  1115. again:
  1116. if (rb_per_cpu_empty(cpu_buffer))
  1117. return NULL;
  1118. event = rb_head_event(cpu_buffer);
  1119. switch (event->type) {
  1120. case RINGBUF_TYPE_PADDING:
  1121. rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
  1122. rb_reset_read_page(cpu_buffer);
  1123. goto again;
  1124. case RINGBUF_TYPE_TIME_EXTEND:
  1125. /* Internal data, OK to advance */
  1126. rb_advance_head(cpu_buffer);
  1127. goto again;
  1128. case RINGBUF_TYPE_TIME_STAMP:
  1129. /* FIXME: not implemented */
  1130. rb_advance_head(cpu_buffer);
  1131. goto again;
  1132. case RINGBUF_TYPE_DATA:
  1133. if (ts) {
  1134. *ts = cpu_buffer->read_stamp + event->time_delta;
  1135. ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
  1136. }
  1137. return event;
  1138. default:
  1139. BUG();
  1140. }
  1141. return NULL;
  1142. }
  1143. /**
  1144. * ring_buffer_iter_peek - peek at the next event to be read
  1145. * @iter: The ring buffer iterator
  1146. * @ts: The timestamp counter of this event.
  1147. *
  1148. * This will return the event that will be read next, but does
  1149. * not increment the iterator.
  1150. */
  1151. struct ring_buffer_event *
  1152. ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
  1153. {
  1154. struct ring_buffer *buffer;
  1155. struct ring_buffer_per_cpu *cpu_buffer;
  1156. struct ring_buffer_event *event;
  1157. if (ring_buffer_iter_empty(iter))
  1158. return NULL;
  1159. cpu_buffer = iter->cpu_buffer;
  1160. buffer = cpu_buffer->buffer;
  1161. again:
  1162. if (rb_per_cpu_empty(cpu_buffer))
  1163. return NULL;
  1164. event = rb_iter_head_event(iter);
  1165. switch (event->type) {
  1166. case RINGBUF_TYPE_PADDING:
  1167. rb_inc_page(cpu_buffer, &iter->head_page);
  1168. rb_reset_iter_read_page(iter);
  1169. goto again;
  1170. case RINGBUF_TYPE_TIME_EXTEND:
  1171. /* Internal data, OK to advance */
  1172. rb_advance_iter(iter);
  1173. goto again;
  1174. case RINGBUF_TYPE_TIME_STAMP:
  1175. /* FIXME: not implemented */
  1176. rb_advance_iter(iter);
  1177. goto again;
  1178. case RINGBUF_TYPE_DATA:
  1179. if (ts) {
  1180. *ts = iter->read_stamp + event->time_delta;
  1181. ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
  1182. }
  1183. return event;
  1184. default:
  1185. BUG();
  1186. }
  1187. return NULL;
  1188. }
  1189. /**
  1190. * ring_buffer_consume - return an event and consume it
  1191. * @buffer: The ring buffer to get the next event from
  1192. *
  1193. * Returns the next event in the ring buffer, and that event is consumed.
  1194. * Meaning, that sequential reads will keep returning a different event,
  1195. * and eventually empty the ring buffer if the producer is slower.
  1196. */
  1197. struct ring_buffer_event *
  1198. ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
  1199. {
  1200. struct ring_buffer_per_cpu *cpu_buffer;
  1201. struct ring_buffer_event *event;
  1202. if (!cpu_isset(cpu, buffer->cpumask))
  1203. return NULL;
  1204. event = ring_buffer_peek(buffer, cpu, ts);
  1205. if (!event)
  1206. return NULL;
  1207. cpu_buffer = buffer->buffers[cpu];
  1208. rb_advance_head(cpu_buffer);
  1209. return event;
  1210. }
  1211. /**
  1212. * ring_buffer_read_start - start a non consuming read of the buffer
  1213. * @buffer: The ring buffer to read from
  1214. * @cpu: The cpu buffer to iterate over
  1215. *
  1216. * This starts up an iteration through the buffer. It also disables
  1217. * the recording to the buffer until the reading is finished.
  1218. * This prevents the reading from being corrupted. This is not
  1219. * a consuming read, so a producer is not expected.
  1220. *
  1221. * Must be paired with ring_buffer_finish.
  1222. */
  1223. struct ring_buffer_iter *
  1224. ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
  1225. {
  1226. struct ring_buffer_per_cpu *cpu_buffer;
  1227. struct ring_buffer_iter *iter;
  1228. if (!cpu_isset(cpu, buffer->cpumask))
  1229. return NULL;
  1230. iter = kmalloc(sizeof(*iter), GFP_KERNEL);
  1231. if (!iter)
  1232. return NULL;
  1233. cpu_buffer = buffer->buffers[cpu];
  1234. iter->cpu_buffer = cpu_buffer;
  1235. atomic_inc(&cpu_buffer->record_disabled);
  1236. synchronize_sched();
  1237. spin_lock(&cpu_buffer->lock);
  1238. iter->head = cpu_buffer->head;
  1239. iter->head_page = cpu_buffer->head_page;
  1240. rb_reset_iter_read_page(iter);
  1241. spin_unlock(&cpu_buffer->lock);
  1242. return iter;
  1243. }
  1244. /**
  1245. * ring_buffer_finish - finish reading the iterator of the buffer
  1246. * @iter: The iterator retrieved by ring_buffer_start
  1247. *
  1248. * This re-enables the recording to the buffer, and frees the
  1249. * iterator.
  1250. */
  1251. void
  1252. ring_buffer_read_finish(struct ring_buffer_iter *iter)
  1253. {
  1254. struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
  1255. atomic_dec(&cpu_buffer->record_disabled);
  1256. kfree(iter);
  1257. }
  1258. /**
  1259. * ring_buffer_read - read the next item in the ring buffer by the iterator
  1260. * @iter: The ring buffer iterator
  1261. * @ts: The time stamp of the event read.
  1262. *
  1263. * This reads the next event in the ring buffer and increments the iterator.
  1264. */
  1265. struct ring_buffer_event *
  1266. ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
  1267. {
  1268. struct ring_buffer_event *event;
  1269. event = ring_buffer_iter_peek(iter, ts);
  1270. if (!event)
  1271. return NULL;
  1272. rb_advance_iter(iter);
  1273. return event;
  1274. }
  1275. /**
  1276. * ring_buffer_size - return the size of the ring buffer (in bytes)
  1277. * @buffer: The ring buffer.
  1278. */
  1279. unsigned long ring_buffer_size(struct ring_buffer *buffer)
  1280. {
  1281. return BUF_PAGE_SIZE * buffer->pages;
  1282. }
  1283. static void
  1284. rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
  1285. {
  1286. cpu_buffer->head_page
  1287. = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
  1288. cpu_buffer->tail_page
  1289. = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
  1290. cpu_buffer->head = cpu_buffer->tail = 0;
  1291. cpu_buffer->overrun = 0;
  1292. cpu_buffer->entries = 0;
  1293. }
  1294. /**
  1295. * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
  1296. * @buffer: The ring buffer to reset a per cpu buffer of
  1297. * @cpu: The CPU buffer to be reset
  1298. */
  1299. void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
  1300. {
  1301. struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
  1302. unsigned long flags;
  1303. if (!cpu_isset(cpu, buffer->cpumask))
  1304. return;
  1305. local_irq_save(flags);
  1306. spin_lock(&cpu_buffer->lock);
  1307. rb_reset_cpu(cpu_buffer);
  1308. spin_unlock(&cpu_buffer->lock);
  1309. local_irq_restore(flags);
  1310. }
  1311. /**
  1312. * ring_buffer_reset - reset a ring buffer
  1313. * @buffer: The ring buffer to reset all cpu buffers
  1314. */
  1315. void ring_buffer_reset(struct ring_buffer *buffer)
  1316. {
  1317. unsigned long flags;
  1318. int cpu;
  1319. ring_buffer_lock(buffer, &flags);
  1320. for_each_buffer_cpu(buffer, cpu)
  1321. rb_reset_cpu(buffer->buffers[cpu]);
  1322. ring_buffer_unlock(buffer, flags);
  1323. }
  1324. /**
  1325. * rind_buffer_empty - is the ring buffer empty?
  1326. * @buffer: The ring buffer to test
  1327. */
  1328. int ring_buffer_empty(struct ring_buffer *buffer)
  1329. {
  1330. struct ring_buffer_per_cpu *cpu_buffer;
  1331. int cpu;
  1332. /* yes this is racy, but if you don't like the race, lock the buffer */
  1333. for_each_buffer_cpu(buffer, cpu) {
  1334. cpu_buffer = buffer->buffers[cpu];
  1335. if (!rb_per_cpu_empty(cpu_buffer))
  1336. return 0;
  1337. }
  1338. return 1;
  1339. }
  1340. /**
  1341. * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
  1342. * @buffer: The ring buffer
  1343. * @cpu: The CPU buffer to test
  1344. */
  1345. int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
  1346. {
  1347. struct ring_buffer_per_cpu *cpu_buffer;
  1348. if (!cpu_isset(cpu, buffer->cpumask))
  1349. return 1;
  1350. cpu_buffer = buffer->buffers[cpu];
  1351. return rb_per_cpu_empty(cpu_buffer);
  1352. }
  1353. /**
  1354. * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
  1355. * @buffer_a: One buffer to swap with
  1356. * @buffer_b: The other buffer to swap with
  1357. *
  1358. * This function is useful for tracers that want to take a "snapshot"
  1359. * of a CPU buffer and has another back up buffer lying around.
  1360. * it is expected that the tracer handles the cpu buffer not being
  1361. * used at the moment.
  1362. */
  1363. int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
  1364. struct ring_buffer *buffer_b, int cpu)
  1365. {
  1366. struct ring_buffer_per_cpu *cpu_buffer_a;
  1367. struct ring_buffer_per_cpu *cpu_buffer_b;
  1368. if (!cpu_isset(cpu, buffer_a->cpumask) ||
  1369. !cpu_isset(cpu, buffer_b->cpumask))
  1370. return -EINVAL;
  1371. /* At least make sure the two buffers are somewhat the same */
  1372. if (buffer_a->size != buffer_b->size ||
  1373. buffer_a->pages != buffer_b->pages)
  1374. return -EINVAL;
  1375. cpu_buffer_a = buffer_a->buffers[cpu];
  1376. cpu_buffer_b = buffer_b->buffers[cpu];
  1377. /*
  1378. * We can't do a synchronize_sched here because this
  1379. * function can be called in atomic context.
  1380. * Normally this will be called from the same CPU as cpu.
  1381. * If not it's up to the caller to protect this.
  1382. */
  1383. atomic_inc(&cpu_buffer_a->record_disabled);
  1384. atomic_inc(&cpu_buffer_b->record_disabled);
  1385. buffer_a->buffers[cpu] = cpu_buffer_b;
  1386. buffer_b->buffers[cpu] = cpu_buffer_a;
  1387. cpu_buffer_b->buffer = buffer_a;
  1388. cpu_buffer_a->buffer = buffer_b;
  1389. atomic_dec(&cpu_buffer_a->record_disabled);
  1390. atomic_dec(&cpu_buffer_b->record_disabled);
  1391. return 0;
  1392. }