ring_buffer.c 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753
  1. /*
  2. * Generic ring buffer
  3. *
  4. * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
  5. */
  6. #include <linux/ring_buffer.h>
  7. #include <linux/spinlock.h>
  8. #include <linux/debugfs.h>
  9. #include <linux/uaccess.h>
  10. #include <linux/module.h>
  11. #include <linux/percpu.h>
  12. #include <linux/mutex.h>
  13. #include <linux/sched.h> /* used for sched_clock() (for now) */
  14. #include <linux/init.h>
  15. #include <linux/hash.h>
  16. #include <linux/list.h>
  17. #include <linux/fs.h>
  18. /* Up this if you want to test the TIME_EXTENTS and normalization */
  19. #define DEBUG_SHIFT 0
  20. /* FIXME!!! */
  21. u64 ring_buffer_time_stamp(int cpu)
  22. {
  23. /* shift to debug/test normalization and TIME_EXTENTS */
  24. return sched_clock() << DEBUG_SHIFT;
  25. }
  26. void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
  27. {
  28. /* Just stupid testing the normalize function and deltas */
  29. *ts >>= DEBUG_SHIFT;
  30. }
  31. #define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
  32. #define RB_ALIGNMENT_SHIFT 2
  33. #define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT)
  34. #define RB_MAX_SMALL_DATA 28
  35. enum {
  36. RB_LEN_TIME_EXTEND = 8,
  37. RB_LEN_TIME_STAMP = 16,
  38. };
  39. /* inline for ring buffer fast paths */
  40. static inline unsigned
  41. rb_event_length(struct ring_buffer_event *event)
  42. {
  43. unsigned length;
  44. switch (event->type) {
  45. case RINGBUF_TYPE_PADDING:
  46. /* undefined */
  47. return -1;
  48. case RINGBUF_TYPE_TIME_EXTEND:
  49. return RB_LEN_TIME_EXTEND;
  50. case RINGBUF_TYPE_TIME_STAMP:
  51. return RB_LEN_TIME_STAMP;
  52. case RINGBUF_TYPE_DATA:
  53. if (event->len)
  54. length = event->len << RB_ALIGNMENT_SHIFT;
  55. else
  56. length = event->array[0];
  57. return length + RB_EVNT_HDR_SIZE;
  58. default:
  59. BUG();
  60. }
  61. /* not hit */
  62. return 0;
  63. }
  64. /**
  65. * ring_buffer_event_length - return the length of the event
  66. * @event: the event to get the length of
  67. */
  68. unsigned ring_buffer_event_length(struct ring_buffer_event *event)
  69. {
  70. return rb_event_length(event);
  71. }
  72. /* inline for ring buffer fast paths */
  73. static inline void *
  74. rb_event_data(struct ring_buffer_event *event)
  75. {
  76. BUG_ON(event->type != RINGBUF_TYPE_DATA);
  77. /* If length is in len field, then array[0] has the data */
  78. if (event->len)
  79. return (void *)&event->array[0];
  80. /* Otherwise length is in array[0] and array[1] has the data */
  81. return (void *)&event->array[1];
  82. }
  83. /**
  84. * ring_buffer_event_data - return the data of the event
  85. * @event: the event to get the data from
  86. */
  87. void *ring_buffer_event_data(struct ring_buffer_event *event)
  88. {
  89. return rb_event_data(event);
  90. }
  91. #define for_each_buffer_cpu(buffer, cpu) \
  92. for_each_cpu_mask(cpu, buffer->cpumask)
  93. #define TS_SHIFT 27
  94. #define TS_MASK ((1ULL << TS_SHIFT) - 1)
  95. #define TS_DELTA_TEST (~TS_MASK)
  96. /*
  97. * This hack stolen from mm/slob.c.
  98. * We can store per page timing information in the page frame of the page.
  99. * Thanks to Peter Zijlstra for suggesting this idea.
  100. */
  101. struct buffer_page {
  102. u64 time_stamp; /* page time stamp */
  103. unsigned size; /* size of page data */
  104. unsigned write; /* index for next write */
  105. unsigned read; /* index for next read */
  106. struct list_head list; /* list of free pages */
  107. void *page; /* Actual data page */
  108. };
  109. /*
  110. * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
  111. * this issue out.
  112. */
  113. static inline void free_buffer_page(struct buffer_page *bpage)
  114. {
  115. if (bpage->page)
  116. __free_page(bpage->page);
  117. kfree(bpage);
  118. }
  119. /*
  120. * We need to fit the time_stamp delta into 27 bits.
  121. */
  122. static inline int test_time_stamp(u64 delta)
  123. {
  124. if (delta & TS_DELTA_TEST)
  125. return 1;
  126. return 0;
  127. }
  128. #define BUF_PAGE_SIZE PAGE_SIZE
  129. /*
  130. * head_page == tail_page && head == tail then buffer is empty.
  131. */
  132. struct ring_buffer_per_cpu {
  133. int cpu;
  134. struct ring_buffer *buffer;
  135. spinlock_t lock;
  136. struct lock_class_key lock_key;
  137. struct list_head pages;
  138. struct buffer_page *head_page; /* read from head */
  139. struct buffer_page *tail_page; /* write to tail */
  140. struct buffer_page *reader_page;
  141. unsigned long overrun;
  142. unsigned long entries;
  143. u64 write_stamp;
  144. u64 read_stamp;
  145. atomic_t record_disabled;
  146. };
  147. struct ring_buffer {
  148. unsigned long size;
  149. unsigned pages;
  150. unsigned flags;
  151. int cpus;
  152. cpumask_t cpumask;
  153. atomic_t record_disabled;
  154. struct mutex mutex;
  155. struct ring_buffer_per_cpu **buffers;
  156. };
  157. struct ring_buffer_iter {
  158. struct ring_buffer_per_cpu *cpu_buffer;
  159. unsigned long head;
  160. struct buffer_page *head_page;
  161. u64 read_stamp;
  162. };
  163. #define RB_WARN_ON(buffer, cond) \
  164. if (unlikely(cond)) { \
  165. atomic_inc(&buffer->record_disabled); \
  166. WARN_ON(1); \
  167. return -1; \
  168. }
  169. /**
  170. * check_pages - integrity check of buffer pages
  171. * @cpu_buffer: CPU buffer with pages to test
  172. *
  173. * As a safty measure we check to make sure the data pages have not
  174. * been corrupted.
  175. */
  176. static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
  177. {
  178. struct list_head *head = &cpu_buffer->pages;
  179. struct buffer_page *page, *tmp;
  180. RB_WARN_ON(cpu_buffer, head->next->prev != head);
  181. RB_WARN_ON(cpu_buffer, head->prev->next != head);
  182. list_for_each_entry_safe(page, tmp, head, list) {
  183. RB_WARN_ON(cpu_buffer, page->list.next->prev != &page->list);
  184. RB_WARN_ON(cpu_buffer, page->list.prev->next != &page->list);
  185. }
  186. return 0;
  187. }
  188. static unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
  189. {
  190. return cpu_buffer->head_page->size;
  191. }
  192. static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
  193. unsigned nr_pages)
  194. {
  195. struct list_head *head = &cpu_buffer->pages;
  196. struct buffer_page *page, *tmp;
  197. unsigned long addr;
  198. LIST_HEAD(pages);
  199. unsigned i;
  200. for (i = 0; i < nr_pages; i++) {
  201. page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
  202. GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
  203. if (!page)
  204. goto free_pages;
  205. list_add(&page->list, &pages);
  206. addr = __get_free_page(GFP_KERNEL);
  207. if (!addr)
  208. goto free_pages;
  209. page->page = (void *)addr;
  210. }
  211. list_splice(&pages, head);
  212. rb_check_pages(cpu_buffer);
  213. return 0;
  214. free_pages:
  215. list_for_each_entry_safe(page, tmp, &pages, list) {
  216. list_del_init(&page->list);
  217. free_buffer_page(page);
  218. }
  219. return -ENOMEM;
  220. }
  221. static struct ring_buffer_per_cpu *
  222. rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
  223. {
  224. struct ring_buffer_per_cpu *cpu_buffer;
  225. struct buffer_page *page;
  226. unsigned long addr;
  227. int ret;
  228. cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
  229. GFP_KERNEL, cpu_to_node(cpu));
  230. if (!cpu_buffer)
  231. return NULL;
  232. cpu_buffer->cpu = cpu;
  233. cpu_buffer->buffer = buffer;
  234. spin_lock_init(&cpu_buffer->lock);
  235. INIT_LIST_HEAD(&cpu_buffer->pages);
  236. page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
  237. GFP_KERNEL, cpu_to_node(cpu));
  238. if (!page)
  239. goto fail_free_buffer;
  240. cpu_buffer->reader_page = page;
  241. addr = __get_free_page(GFP_KERNEL);
  242. if (!addr)
  243. goto fail_free_reader;
  244. page->page = (void *)addr;
  245. INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
  246. cpu_buffer->reader_page->size = 0;
  247. ret = rb_allocate_pages(cpu_buffer, buffer->pages);
  248. if (ret < 0)
  249. goto fail_free_reader;
  250. cpu_buffer->head_page
  251. = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
  252. cpu_buffer->tail_page
  253. = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
  254. return cpu_buffer;
  255. fail_free_reader:
  256. free_buffer_page(cpu_buffer->reader_page);
  257. fail_free_buffer:
  258. kfree(cpu_buffer);
  259. return NULL;
  260. }
  261. static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
  262. {
  263. struct list_head *head = &cpu_buffer->pages;
  264. struct buffer_page *page, *tmp;
  265. list_del_init(&cpu_buffer->reader_page->list);
  266. free_buffer_page(cpu_buffer->reader_page);
  267. list_for_each_entry_safe(page, tmp, head, list) {
  268. list_del_init(&page->list);
  269. free_buffer_page(page);
  270. }
  271. kfree(cpu_buffer);
  272. }
  273. /*
  274. * Causes compile errors if the struct buffer_page gets bigger
  275. * than the struct page.
  276. */
  277. extern int ring_buffer_page_too_big(void);
  278. /**
  279. * ring_buffer_alloc - allocate a new ring_buffer
  280. * @size: the size in bytes that is needed.
  281. * @flags: attributes to set for the ring buffer.
  282. *
  283. * Currently the only flag that is available is the RB_FL_OVERWRITE
  284. * flag. This flag means that the buffer will overwrite old data
  285. * when the buffer wraps. If this flag is not set, the buffer will
  286. * drop data when the tail hits the head.
  287. */
  288. struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
  289. {
  290. struct ring_buffer *buffer;
  291. int bsize;
  292. int cpu;
  293. /* Paranoid! Optimizes out when all is well */
  294. if (sizeof(struct buffer_page) > sizeof(struct page))
  295. ring_buffer_page_too_big();
  296. /* keep it in its own cache line */
  297. buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
  298. GFP_KERNEL);
  299. if (!buffer)
  300. return NULL;
  301. buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
  302. buffer->flags = flags;
  303. /* need at least two pages */
  304. if (buffer->pages == 1)
  305. buffer->pages++;
  306. buffer->cpumask = cpu_possible_map;
  307. buffer->cpus = nr_cpu_ids;
  308. bsize = sizeof(void *) * nr_cpu_ids;
  309. buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
  310. GFP_KERNEL);
  311. if (!buffer->buffers)
  312. goto fail_free_buffer;
  313. for_each_buffer_cpu(buffer, cpu) {
  314. buffer->buffers[cpu] =
  315. rb_allocate_cpu_buffer(buffer, cpu);
  316. if (!buffer->buffers[cpu])
  317. goto fail_free_buffers;
  318. }
  319. mutex_init(&buffer->mutex);
  320. return buffer;
  321. fail_free_buffers:
  322. for_each_buffer_cpu(buffer, cpu) {
  323. if (buffer->buffers[cpu])
  324. rb_free_cpu_buffer(buffer->buffers[cpu]);
  325. }
  326. kfree(buffer->buffers);
  327. fail_free_buffer:
  328. kfree(buffer);
  329. return NULL;
  330. }
  331. /**
  332. * ring_buffer_free - free a ring buffer.
  333. * @buffer: the buffer to free.
  334. */
  335. void
  336. ring_buffer_free(struct ring_buffer *buffer)
  337. {
  338. int cpu;
  339. for_each_buffer_cpu(buffer, cpu)
  340. rb_free_cpu_buffer(buffer->buffers[cpu]);
  341. kfree(buffer);
  342. }
  343. static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
  344. static void
  345. rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
  346. {
  347. struct buffer_page *page;
  348. struct list_head *p;
  349. unsigned i;
  350. atomic_inc(&cpu_buffer->record_disabled);
  351. synchronize_sched();
  352. for (i = 0; i < nr_pages; i++) {
  353. BUG_ON(list_empty(&cpu_buffer->pages));
  354. p = cpu_buffer->pages.next;
  355. page = list_entry(p, struct buffer_page, list);
  356. list_del_init(&page->list);
  357. free_buffer_page(page);
  358. }
  359. BUG_ON(list_empty(&cpu_buffer->pages));
  360. rb_reset_cpu(cpu_buffer);
  361. rb_check_pages(cpu_buffer);
  362. atomic_dec(&cpu_buffer->record_disabled);
  363. }
  364. static void
  365. rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
  366. struct list_head *pages, unsigned nr_pages)
  367. {
  368. struct buffer_page *page;
  369. struct list_head *p;
  370. unsigned i;
  371. atomic_inc(&cpu_buffer->record_disabled);
  372. synchronize_sched();
  373. for (i = 0; i < nr_pages; i++) {
  374. BUG_ON(list_empty(pages));
  375. p = pages->next;
  376. page = list_entry(p, struct buffer_page, list);
  377. list_del_init(&page->list);
  378. list_add_tail(&page->list, &cpu_buffer->pages);
  379. }
  380. rb_reset_cpu(cpu_buffer);
  381. rb_check_pages(cpu_buffer);
  382. atomic_dec(&cpu_buffer->record_disabled);
  383. }
  384. /**
  385. * ring_buffer_resize - resize the ring buffer
  386. * @buffer: the buffer to resize.
  387. * @size: the new size.
  388. *
  389. * The tracer is responsible for making sure that the buffer is
  390. * not being used while changing the size.
  391. * Note: We may be able to change the above requirement by using
  392. * RCU synchronizations.
  393. *
  394. * Minimum size is 2 * BUF_PAGE_SIZE.
  395. *
  396. * Returns -1 on failure.
  397. */
  398. int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
  399. {
  400. struct ring_buffer_per_cpu *cpu_buffer;
  401. unsigned nr_pages, rm_pages, new_pages;
  402. struct buffer_page *page, *tmp;
  403. unsigned long buffer_size;
  404. unsigned long addr;
  405. LIST_HEAD(pages);
  406. int i, cpu;
  407. size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
  408. size *= BUF_PAGE_SIZE;
  409. buffer_size = buffer->pages * BUF_PAGE_SIZE;
  410. /* we need a minimum of two pages */
  411. if (size < BUF_PAGE_SIZE * 2)
  412. size = BUF_PAGE_SIZE * 2;
  413. if (size == buffer_size)
  414. return size;
  415. mutex_lock(&buffer->mutex);
  416. nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
  417. if (size < buffer_size) {
  418. /* easy case, just free pages */
  419. BUG_ON(nr_pages >= buffer->pages);
  420. rm_pages = buffer->pages - nr_pages;
  421. for_each_buffer_cpu(buffer, cpu) {
  422. cpu_buffer = buffer->buffers[cpu];
  423. rb_remove_pages(cpu_buffer, rm_pages);
  424. }
  425. goto out;
  426. }
  427. /*
  428. * This is a bit more difficult. We only want to add pages
  429. * when we can allocate enough for all CPUs. We do this
  430. * by allocating all the pages and storing them on a local
  431. * link list. If we succeed in our allocation, then we
  432. * add these pages to the cpu_buffers. Otherwise we just free
  433. * them all and return -ENOMEM;
  434. */
  435. BUG_ON(nr_pages <= buffer->pages);
  436. new_pages = nr_pages - buffer->pages;
  437. for_each_buffer_cpu(buffer, cpu) {
  438. for (i = 0; i < new_pages; i++) {
  439. page = kzalloc_node(ALIGN(sizeof(*page),
  440. cache_line_size()),
  441. GFP_KERNEL, cpu_to_node(cpu));
  442. if (!page)
  443. goto free_pages;
  444. list_add(&page->list, &pages);
  445. addr = __get_free_page(GFP_KERNEL);
  446. if (!addr)
  447. goto free_pages;
  448. page->page = (void *)addr;
  449. }
  450. }
  451. for_each_buffer_cpu(buffer, cpu) {
  452. cpu_buffer = buffer->buffers[cpu];
  453. rb_insert_pages(cpu_buffer, &pages, new_pages);
  454. }
  455. BUG_ON(!list_empty(&pages));
  456. out:
  457. buffer->pages = nr_pages;
  458. mutex_unlock(&buffer->mutex);
  459. return size;
  460. free_pages:
  461. list_for_each_entry_safe(page, tmp, &pages, list) {
  462. list_del_init(&page->list);
  463. free_buffer_page(page);
  464. }
  465. return -ENOMEM;
  466. }
  467. static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
  468. {
  469. return cpu_buffer->reader_page->read == cpu_buffer->reader_page->size &&
  470. (cpu_buffer->tail_page == cpu_buffer->reader_page ||
  471. (cpu_buffer->tail_page == cpu_buffer->head_page &&
  472. cpu_buffer->head_page->read ==
  473. cpu_buffer->tail_page->write));
  474. }
  475. static inline int rb_null_event(struct ring_buffer_event *event)
  476. {
  477. return event->type == RINGBUF_TYPE_PADDING;
  478. }
  479. static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
  480. {
  481. return page->page + index;
  482. }
  483. static inline struct ring_buffer_event *
  484. rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
  485. {
  486. return __rb_page_index(cpu_buffer->reader_page,
  487. cpu_buffer->reader_page->read);
  488. }
  489. static inline struct ring_buffer_event *
  490. rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
  491. {
  492. return __rb_page_index(cpu_buffer->head_page,
  493. cpu_buffer->head_page->read);
  494. }
  495. static inline struct ring_buffer_event *
  496. rb_iter_head_event(struct ring_buffer_iter *iter)
  497. {
  498. return __rb_page_index(iter->head_page, iter->head);
  499. }
  500. /*
  501. * When the tail hits the head and the buffer is in overwrite mode,
  502. * the head jumps to the next page and all content on the previous
  503. * page is discarded. But before doing so, we update the overrun
  504. * variable of the buffer.
  505. */
  506. static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
  507. {
  508. struct ring_buffer_event *event;
  509. unsigned long head;
  510. for (head = 0; head < rb_head_size(cpu_buffer);
  511. head += rb_event_length(event)) {
  512. event = __rb_page_index(cpu_buffer->head_page, head);
  513. BUG_ON(rb_null_event(event));
  514. /* Only count data entries */
  515. if (event->type != RINGBUF_TYPE_DATA)
  516. continue;
  517. cpu_buffer->overrun++;
  518. cpu_buffer->entries--;
  519. }
  520. }
  521. static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
  522. struct buffer_page **page)
  523. {
  524. struct list_head *p = (*page)->list.next;
  525. if (p == &cpu_buffer->pages)
  526. p = p->next;
  527. *page = list_entry(p, struct buffer_page, list);
  528. }
  529. static inline void
  530. rb_add_stamp(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
  531. {
  532. cpu_buffer->tail_page->time_stamp = *ts;
  533. cpu_buffer->write_stamp = *ts;
  534. }
  535. static void rb_reset_head_page(struct ring_buffer_per_cpu *cpu_buffer)
  536. {
  537. cpu_buffer->head_page->read = 0;
  538. }
  539. static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
  540. {
  541. cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
  542. cpu_buffer->reader_page->read = 0;
  543. }
  544. static inline void rb_inc_iter(struct ring_buffer_iter *iter)
  545. {
  546. struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
  547. /*
  548. * The iterator could be on the reader page (it starts there).
  549. * But the head could have moved, since the reader was
  550. * found. Check for this case and assign the iterator
  551. * to the head page instead of next.
  552. */
  553. if (iter->head_page == cpu_buffer->reader_page)
  554. iter->head_page = cpu_buffer->head_page;
  555. else
  556. rb_inc_page(cpu_buffer, &iter->head_page);
  557. iter->read_stamp = iter->head_page->time_stamp;
  558. iter->head = 0;
  559. }
  560. /**
  561. * ring_buffer_update_event - update event type and data
  562. * @event: the even to update
  563. * @type: the type of event
  564. * @length: the size of the event field in the ring buffer
  565. *
  566. * Update the type and data fields of the event. The length
  567. * is the actual size that is written to the ring buffer,
  568. * and with this, we can determine what to place into the
  569. * data field.
  570. */
  571. static inline void
  572. rb_update_event(struct ring_buffer_event *event,
  573. unsigned type, unsigned length)
  574. {
  575. event->type = type;
  576. switch (type) {
  577. case RINGBUF_TYPE_PADDING:
  578. break;
  579. case RINGBUF_TYPE_TIME_EXTEND:
  580. event->len =
  581. (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
  582. >> RB_ALIGNMENT_SHIFT;
  583. break;
  584. case RINGBUF_TYPE_TIME_STAMP:
  585. event->len =
  586. (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
  587. >> RB_ALIGNMENT_SHIFT;
  588. break;
  589. case RINGBUF_TYPE_DATA:
  590. length -= RB_EVNT_HDR_SIZE;
  591. if (length > RB_MAX_SMALL_DATA) {
  592. event->len = 0;
  593. event->array[0] = length;
  594. } else
  595. event->len =
  596. (length + (RB_ALIGNMENT-1))
  597. >> RB_ALIGNMENT_SHIFT;
  598. break;
  599. default:
  600. BUG();
  601. }
  602. }
  603. static inline unsigned rb_calculate_event_length(unsigned length)
  604. {
  605. struct ring_buffer_event event; /* Used only for sizeof array */
  606. /* zero length can cause confusions */
  607. if (!length)
  608. length = 1;
  609. if (length > RB_MAX_SMALL_DATA)
  610. length += sizeof(event.array[0]);
  611. length += RB_EVNT_HDR_SIZE;
  612. length = ALIGN(length, RB_ALIGNMENT);
  613. return length;
  614. }
  615. static struct ring_buffer_event *
  616. __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
  617. unsigned type, unsigned long length, u64 *ts)
  618. {
  619. struct buffer_page *tail_page, *head_page, *reader_page;
  620. unsigned long tail;
  621. struct ring_buffer *buffer = cpu_buffer->buffer;
  622. struct ring_buffer_event *event;
  623. tail_page = cpu_buffer->tail_page;
  624. tail = cpu_buffer->tail_page->write;
  625. if (tail + length > BUF_PAGE_SIZE) {
  626. struct buffer_page *next_page = tail_page;
  627. spin_lock(&cpu_buffer->lock);
  628. rb_inc_page(cpu_buffer, &next_page);
  629. head_page = cpu_buffer->head_page;
  630. reader_page = cpu_buffer->reader_page;
  631. /* we grabbed the lock before incrementing */
  632. WARN_ON(next_page == reader_page);
  633. if (next_page == head_page) {
  634. if (!(buffer->flags & RB_FL_OVERWRITE)) {
  635. spin_unlock(&cpu_buffer->lock);
  636. return NULL;
  637. }
  638. /* count overflows */
  639. rb_update_overflow(cpu_buffer);
  640. rb_inc_page(cpu_buffer, &head_page);
  641. cpu_buffer->head_page = head_page;
  642. rb_reset_head_page(cpu_buffer);
  643. }
  644. if (tail != BUF_PAGE_SIZE) {
  645. event = __rb_page_index(tail_page, tail);
  646. /* page padding */
  647. event->type = RINGBUF_TYPE_PADDING;
  648. }
  649. tail_page->size = tail;
  650. tail_page = next_page;
  651. tail_page->size = 0;
  652. tail = 0;
  653. cpu_buffer->tail_page = tail_page;
  654. cpu_buffer->tail_page->write = tail;
  655. rb_add_stamp(cpu_buffer, ts);
  656. spin_unlock(&cpu_buffer->lock);
  657. }
  658. BUG_ON(tail + length > BUF_PAGE_SIZE);
  659. event = __rb_page_index(tail_page, tail);
  660. rb_update_event(event, type, length);
  661. return event;
  662. }
  663. static int
  664. rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
  665. u64 *ts, u64 *delta)
  666. {
  667. struct ring_buffer_event *event;
  668. static int once;
  669. if (unlikely(*delta > (1ULL << 59) && !once++)) {
  670. printk(KERN_WARNING "Delta way too big! %llu"
  671. " ts=%llu write stamp = %llu\n",
  672. *delta, *ts, cpu_buffer->write_stamp);
  673. WARN_ON(1);
  674. }
  675. /*
  676. * The delta is too big, we to add a
  677. * new timestamp.
  678. */
  679. event = __rb_reserve_next(cpu_buffer,
  680. RINGBUF_TYPE_TIME_EXTEND,
  681. RB_LEN_TIME_EXTEND,
  682. ts);
  683. if (!event)
  684. return -1;
  685. /* check to see if we went to the next page */
  686. if (cpu_buffer->tail_page->write) {
  687. /* Still on same page, update timestamp */
  688. event->time_delta = *delta & TS_MASK;
  689. event->array[0] = *delta >> TS_SHIFT;
  690. /* commit the time event */
  691. cpu_buffer->tail_page->write +=
  692. rb_event_length(event);
  693. cpu_buffer->write_stamp = *ts;
  694. *delta = 0;
  695. }
  696. return 0;
  697. }
  698. static struct ring_buffer_event *
  699. rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
  700. unsigned type, unsigned long length)
  701. {
  702. struct ring_buffer_event *event;
  703. u64 ts, delta;
  704. ts = ring_buffer_time_stamp(cpu_buffer->cpu);
  705. if (cpu_buffer->tail_page->write) {
  706. delta = ts - cpu_buffer->write_stamp;
  707. if (test_time_stamp(delta)) {
  708. int ret;
  709. ret = rb_add_time_stamp(cpu_buffer, &ts, &delta);
  710. if (ret < 0)
  711. return NULL;
  712. }
  713. } else {
  714. spin_lock(&cpu_buffer->lock);
  715. rb_add_stamp(cpu_buffer, &ts);
  716. spin_unlock(&cpu_buffer->lock);
  717. delta = 0;
  718. }
  719. event = __rb_reserve_next(cpu_buffer, type, length, &ts);
  720. if (!event)
  721. return NULL;
  722. /* If the reserve went to the next page, our delta is zero */
  723. if (!cpu_buffer->tail_page->write)
  724. delta = 0;
  725. event->time_delta = delta;
  726. return event;
  727. }
  728. /**
  729. * ring_buffer_lock_reserve - reserve a part of the buffer
  730. * @buffer: the ring buffer to reserve from
  731. * @length: the length of the data to reserve (excluding event header)
  732. * @flags: a pointer to save the interrupt flags
  733. *
  734. * Returns a reseverd event on the ring buffer to copy directly to.
  735. * The user of this interface will need to get the body to write into
  736. * and can use the ring_buffer_event_data() interface.
  737. *
  738. * The length is the length of the data needed, not the event length
  739. * which also includes the event header.
  740. *
  741. * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
  742. * If NULL is returned, then nothing has been allocated or locked.
  743. */
  744. struct ring_buffer_event *
  745. ring_buffer_lock_reserve(struct ring_buffer *buffer,
  746. unsigned long length,
  747. unsigned long *flags)
  748. {
  749. struct ring_buffer_per_cpu *cpu_buffer;
  750. struct ring_buffer_event *event;
  751. int cpu;
  752. if (atomic_read(&buffer->record_disabled))
  753. return NULL;
  754. local_irq_save(*flags);
  755. cpu = raw_smp_processor_id();
  756. if (!cpu_isset(cpu, buffer->cpumask))
  757. goto out;
  758. cpu_buffer = buffer->buffers[cpu];
  759. if (atomic_read(&cpu_buffer->record_disabled))
  760. goto out;
  761. length = rb_calculate_event_length(length);
  762. if (length > BUF_PAGE_SIZE)
  763. return NULL;
  764. event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
  765. if (!event)
  766. goto out;
  767. return event;
  768. out:
  769. local_irq_restore(*flags);
  770. return NULL;
  771. }
  772. static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
  773. struct ring_buffer_event *event)
  774. {
  775. cpu_buffer->tail_page->write += rb_event_length(event);
  776. cpu_buffer->tail_page->size = cpu_buffer->tail_page->write;
  777. cpu_buffer->write_stamp += event->time_delta;
  778. cpu_buffer->entries++;
  779. }
  780. /**
  781. * ring_buffer_unlock_commit - commit a reserved
  782. * @buffer: The buffer to commit to
  783. * @event: The event pointer to commit.
  784. * @flags: the interrupt flags received from ring_buffer_lock_reserve.
  785. *
  786. * This commits the data to the ring buffer, and releases any locks held.
  787. *
  788. * Must be paired with ring_buffer_lock_reserve.
  789. */
  790. int ring_buffer_unlock_commit(struct ring_buffer *buffer,
  791. struct ring_buffer_event *event,
  792. unsigned long flags)
  793. {
  794. struct ring_buffer_per_cpu *cpu_buffer;
  795. int cpu = raw_smp_processor_id();
  796. cpu_buffer = buffer->buffers[cpu];
  797. rb_commit(cpu_buffer, event);
  798. local_irq_restore(flags);
  799. return 0;
  800. }
  801. /**
  802. * ring_buffer_write - write data to the buffer without reserving
  803. * @buffer: The ring buffer to write to.
  804. * @length: The length of the data being written (excluding the event header)
  805. * @data: The data to write to the buffer.
  806. *
  807. * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
  808. * one function. If you already have the data to write to the buffer, it
  809. * may be easier to simply call this function.
  810. *
  811. * Note, like ring_buffer_lock_reserve, the length is the length of the data
  812. * and not the length of the event which would hold the header.
  813. */
  814. int ring_buffer_write(struct ring_buffer *buffer,
  815. unsigned long length,
  816. void *data)
  817. {
  818. struct ring_buffer_per_cpu *cpu_buffer;
  819. struct ring_buffer_event *event;
  820. unsigned long event_length, flags;
  821. void *body;
  822. int ret = -EBUSY;
  823. int cpu;
  824. if (atomic_read(&buffer->record_disabled))
  825. return -EBUSY;
  826. local_irq_save(flags);
  827. cpu = raw_smp_processor_id();
  828. if (!cpu_isset(cpu, buffer->cpumask))
  829. goto out;
  830. cpu_buffer = buffer->buffers[cpu];
  831. if (atomic_read(&cpu_buffer->record_disabled))
  832. goto out;
  833. event_length = rb_calculate_event_length(length);
  834. event = rb_reserve_next_event(cpu_buffer,
  835. RINGBUF_TYPE_DATA, event_length);
  836. if (!event)
  837. goto out;
  838. body = rb_event_data(event);
  839. memcpy(body, data, length);
  840. rb_commit(cpu_buffer, event);
  841. ret = 0;
  842. out:
  843. local_irq_restore(flags);
  844. return ret;
  845. }
  846. /**
  847. * ring_buffer_record_disable - stop all writes into the buffer
  848. * @buffer: The ring buffer to stop writes to.
  849. *
  850. * This prevents all writes to the buffer. Any attempt to write
  851. * to the buffer after this will fail and return NULL.
  852. *
  853. * The caller should call synchronize_sched() after this.
  854. */
  855. void ring_buffer_record_disable(struct ring_buffer *buffer)
  856. {
  857. atomic_inc(&buffer->record_disabled);
  858. }
  859. /**
  860. * ring_buffer_record_enable - enable writes to the buffer
  861. * @buffer: The ring buffer to enable writes
  862. *
  863. * Note, multiple disables will need the same number of enables
  864. * to truely enable the writing (much like preempt_disable).
  865. */
  866. void ring_buffer_record_enable(struct ring_buffer *buffer)
  867. {
  868. atomic_dec(&buffer->record_disabled);
  869. }
  870. /**
  871. * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
  872. * @buffer: The ring buffer to stop writes to.
  873. * @cpu: The CPU buffer to stop
  874. *
  875. * This prevents all writes to the buffer. Any attempt to write
  876. * to the buffer after this will fail and return NULL.
  877. *
  878. * The caller should call synchronize_sched() after this.
  879. */
  880. void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
  881. {
  882. struct ring_buffer_per_cpu *cpu_buffer;
  883. if (!cpu_isset(cpu, buffer->cpumask))
  884. return;
  885. cpu_buffer = buffer->buffers[cpu];
  886. atomic_inc(&cpu_buffer->record_disabled);
  887. }
  888. /**
  889. * ring_buffer_record_enable_cpu - enable writes to the buffer
  890. * @buffer: The ring buffer to enable writes
  891. * @cpu: The CPU to enable.
  892. *
  893. * Note, multiple disables will need the same number of enables
  894. * to truely enable the writing (much like preempt_disable).
  895. */
  896. void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
  897. {
  898. struct ring_buffer_per_cpu *cpu_buffer;
  899. if (!cpu_isset(cpu, buffer->cpumask))
  900. return;
  901. cpu_buffer = buffer->buffers[cpu];
  902. atomic_dec(&cpu_buffer->record_disabled);
  903. }
  904. /**
  905. * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
  906. * @buffer: The ring buffer
  907. * @cpu: The per CPU buffer to get the entries from.
  908. */
  909. unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
  910. {
  911. struct ring_buffer_per_cpu *cpu_buffer;
  912. if (!cpu_isset(cpu, buffer->cpumask))
  913. return 0;
  914. cpu_buffer = buffer->buffers[cpu];
  915. return cpu_buffer->entries;
  916. }
  917. /**
  918. * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
  919. * @buffer: The ring buffer
  920. * @cpu: The per CPU buffer to get the number of overruns from
  921. */
  922. unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
  923. {
  924. struct ring_buffer_per_cpu *cpu_buffer;
  925. if (!cpu_isset(cpu, buffer->cpumask))
  926. return 0;
  927. cpu_buffer = buffer->buffers[cpu];
  928. return cpu_buffer->overrun;
  929. }
  930. /**
  931. * ring_buffer_entries - get the number of entries in a buffer
  932. * @buffer: The ring buffer
  933. *
  934. * Returns the total number of entries in the ring buffer
  935. * (all CPU entries)
  936. */
  937. unsigned long ring_buffer_entries(struct ring_buffer *buffer)
  938. {
  939. struct ring_buffer_per_cpu *cpu_buffer;
  940. unsigned long entries = 0;
  941. int cpu;
  942. /* if you care about this being correct, lock the buffer */
  943. for_each_buffer_cpu(buffer, cpu) {
  944. cpu_buffer = buffer->buffers[cpu];
  945. entries += cpu_buffer->entries;
  946. }
  947. return entries;
  948. }
  949. /**
  950. * ring_buffer_overrun_cpu - get the number of overruns in buffer
  951. * @buffer: The ring buffer
  952. *
  953. * Returns the total number of overruns in the ring buffer
  954. * (all CPU entries)
  955. */
  956. unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
  957. {
  958. struct ring_buffer_per_cpu *cpu_buffer;
  959. unsigned long overruns = 0;
  960. int cpu;
  961. /* if you care about this being correct, lock the buffer */
  962. for_each_buffer_cpu(buffer, cpu) {
  963. cpu_buffer = buffer->buffers[cpu];
  964. overruns += cpu_buffer->overrun;
  965. }
  966. return overruns;
  967. }
  968. /**
  969. * ring_buffer_iter_reset - reset an iterator
  970. * @iter: The iterator to reset
  971. *
  972. * Resets the iterator, so that it will start from the beginning
  973. * again.
  974. */
  975. void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
  976. {
  977. struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
  978. /* Iterator usage is expected to have record disabled */
  979. if (list_empty(&cpu_buffer->reader_page->list)) {
  980. iter->head_page = cpu_buffer->head_page;
  981. iter->head = cpu_buffer->head_page->read;
  982. } else {
  983. iter->head_page = cpu_buffer->reader_page;
  984. iter->head = cpu_buffer->reader_page->read;
  985. }
  986. if (iter->head)
  987. iter->read_stamp = cpu_buffer->read_stamp;
  988. else
  989. iter->read_stamp = iter->head_page->time_stamp;
  990. }
  991. /**
  992. * ring_buffer_iter_empty - check if an iterator has no more to read
  993. * @iter: The iterator to check
  994. */
  995. int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
  996. {
  997. struct ring_buffer_per_cpu *cpu_buffer;
  998. cpu_buffer = iter->cpu_buffer;
  999. return iter->head_page == cpu_buffer->tail_page &&
  1000. iter->head == cpu_buffer->tail_page->write;
  1001. }
  1002. static void
  1003. rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
  1004. struct ring_buffer_event *event)
  1005. {
  1006. u64 delta;
  1007. switch (event->type) {
  1008. case RINGBUF_TYPE_PADDING:
  1009. return;
  1010. case RINGBUF_TYPE_TIME_EXTEND:
  1011. delta = event->array[0];
  1012. delta <<= TS_SHIFT;
  1013. delta += event->time_delta;
  1014. cpu_buffer->read_stamp += delta;
  1015. return;
  1016. case RINGBUF_TYPE_TIME_STAMP:
  1017. /* FIXME: not implemented */
  1018. return;
  1019. case RINGBUF_TYPE_DATA:
  1020. cpu_buffer->read_stamp += event->time_delta;
  1021. return;
  1022. default:
  1023. BUG();
  1024. }
  1025. return;
  1026. }
  1027. static void
  1028. rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
  1029. struct ring_buffer_event *event)
  1030. {
  1031. u64 delta;
  1032. switch (event->type) {
  1033. case RINGBUF_TYPE_PADDING:
  1034. return;
  1035. case RINGBUF_TYPE_TIME_EXTEND:
  1036. delta = event->array[0];
  1037. delta <<= TS_SHIFT;
  1038. delta += event->time_delta;
  1039. iter->read_stamp += delta;
  1040. return;
  1041. case RINGBUF_TYPE_TIME_STAMP:
  1042. /* FIXME: not implemented */
  1043. return;
  1044. case RINGBUF_TYPE_DATA:
  1045. iter->read_stamp += event->time_delta;
  1046. return;
  1047. default:
  1048. BUG();
  1049. }
  1050. return;
  1051. }
  1052. static struct buffer_page *
  1053. rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
  1054. {
  1055. struct buffer_page *reader = NULL;
  1056. unsigned long flags;
  1057. spin_lock_irqsave(&cpu_buffer->lock, flags);
  1058. again:
  1059. reader = cpu_buffer->reader_page;
  1060. /* If there's more to read, return this page */
  1061. if (cpu_buffer->reader_page->read < reader->size)
  1062. goto out;
  1063. /* Never should we have an index greater than the size */
  1064. WARN_ON(cpu_buffer->reader_page->read > reader->size);
  1065. /* check if we caught up to the tail */
  1066. reader = NULL;
  1067. if (cpu_buffer->tail_page == cpu_buffer->reader_page)
  1068. goto out;
  1069. /*
  1070. * Splice the empty reader page into the list around the head.
  1071. * Reset the reader page to size zero.
  1072. */
  1073. reader = cpu_buffer->head_page;
  1074. cpu_buffer->reader_page->list.next = reader->list.next;
  1075. cpu_buffer->reader_page->list.prev = reader->list.prev;
  1076. cpu_buffer->reader_page->size = 0;
  1077. /* Make the reader page now replace the head */
  1078. reader->list.prev->next = &cpu_buffer->reader_page->list;
  1079. reader->list.next->prev = &cpu_buffer->reader_page->list;
  1080. /*
  1081. * If the tail is on the reader, then we must set the head
  1082. * to the inserted page, otherwise we set it one before.
  1083. */
  1084. cpu_buffer->head_page = cpu_buffer->reader_page;
  1085. if (cpu_buffer->tail_page != reader)
  1086. rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
  1087. /* Finally update the reader page to the new head */
  1088. cpu_buffer->reader_page = reader;
  1089. rb_reset_reader_page(cpu_buffer);
  1090. goto again;
  1091. out:
  1092. spin_unlock_irqrestore(&cpu_buffer->lock, flags);
  1093. return reader;
  1094. }
  1095. static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
  1096. {
  1097. struct ring_buffer_event *event;
  1098. struct buffer_page *reader;
  1099. unsigned length;
  1100. reader = rb_get_reader_page(cpu_buffer);
  1101. /* This function should not be called when buffer is empty */
  1102. BUG_ON(!reader);
  1103. event = rb_reader_event(cpu_buffer);
  1104. if (event->type == RINGBUF_TYPE_DATA)
  1105. cpu_buffer->entries--;
  1106. rb_update_read_stamp(cpu_buffer, event);
  1107. length = rb_event_length(event);
  1108. cpu_buffer->reader_page->read += length;
  1109. }
  1110. static void rb_advance_iter(struct ring_buffer_iter *iter)
  1111. {
  1112. struct ring_buffer *buffer;
  1113. struct ring_buffer_per_cpu *cpu_buffer;
  1114. struct ring_buffer_event *event;
  1115. unsigned length;
  1116. cpu_buffer = iter->cpu_buffer;
  1117. buffer = cpu_buffer->buffer;
  1118. /*
  1119. * Check if we are at the end of the buffer.
  1120. */
  1121. if (iter->head >= iter->head_page->size) {
  1122. BUG_ON(iter->head_page == cpu_buffer->tail_page);
  1123. rb_inc_iter(iter);
  1124. return;
  1125. }
  1126. event = rb_iter_head_event(iter);
  1127. length = rb_event_length(event);
  1128. /*
  1129. * This should not be called to advance the header if we are
  1130. * at the tail of the buffer.
  1131. */
  1132. BUG_ON((iter->head_page == cpu_buffer->tail_page) &&
  1133. (iter->head + length > cpu_buffer->tail_page->write));
  1134. rb_update_iter_read_stamp(iter, event);
  1135. iter->head += length;
  1136. /* check for end of page padding */
  1137. if ((iter->head >= iter->head_page->size) &&
  1138. (iter->head_page != cpu_buffer->tail_page))
  1139. rb_advance_iter(iter);
  1140. }
  1141. /**
  1142. * ring_buffer_peek - peek at the next event to be read
  1143. * @buffer: The ring buffer to read
  1144. * @cpu: The cpu to peak at
  1145. * @ts: The timestamp counter of this event.
  1146. *
  1147. * This will return the event that will be read next, but does
  1148. * not consume the data.
  1149. */
  1150. struct ring_buffer_event *
  1151. ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
  1152. {
  1153. struct ring_buffer_per_cpu *cpu_buffer;
  1154. struct ring_buffer_event *event;
  1155. struct buffer_page *reader;
  1156. if (!cpu_isset(cpu, buffer->cpumask))
  1157. return NULL;
  1158. cpu_buffer = buffer->buffers[cpu];
  1159. again:
  1160. reader = rb_get_reader_page(cpu_buffer);
  1161. if (!reader)
  1162. return NULL;
  1163. event = rb_reader_event(cpu_buffer);
  1164. switch (event->type) {
  1165. case RINGBUF_TYPE_PADDING:
  1166. WARN_ON(1);
  1167. rb_advance_reader(cpu_buffer);
  1168. return NULL;
  1169. case RINGBUF_TYPE_TIME_EXTEND:
  1170. /* Internal data, OK to advance */
  1171. rb_advance_reader(cpu_buffer);
  1172. goto again;
  1173. case RINGBUF_TYPE_TIME_STAMP:
  1174. /* FIXME: not implemented */
  1175. rb_advance_reader(cpu_buffer);
  1176. goto again;
  1177. case RINGBUF_TYPE_DATA:
  1178. if (ts) {
  1179. *ts = cpu_buffer->read_stamp + event->time_delta;
  1180. ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
  1181. }
  1182. return event;
  1183. default:
  1184. BUG();
  1185. }
  1186. return NULL;
  1187. }
  1188. /**
  1189. * ring_buffer_iter_peek - peek at the next event to be read
  1190. * @iter: The ring buffer iterator
  1191. * @ts: The timestamp counter of this event.
  1192. *
  1193. * This will return the event that will be read next, but does
  1194. * not increment the iterator.
  1195. */
  1196. struct ring_buffer_event *
  1197. ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
  1198. {
  1199. struct ring_buffer *buffer;
  1200. struct ring_buffer_per_cpu *cpu_buffer;
  1201. struct ring_buffer_event *event;
  1202. if (ring_buffer_iter_empty(iter))
  1203. return NULL;
  1204. cpu_buffer = iter->cpu_buffer;
  1205. buffer = cpu_buffer->buffer;
  1206. again:
  1207. if (rb_per_cpu_empty(cpu_buffer))
  1208. return NULL;
  1209. event = rb_iter_head_event(iter);
  1210. switch (event->type) {
  1211. case RINGBUF_TYPE_PADDING:
  1212. rb_inc_iter(iter);
  1213. goto again;
  1214. case RINGBUF_TYPE_TIME_EXTEND:
  1215. /* Internal data, OK to advance */
  1216. rb_advance_iter(iter);
  1217. goto again;
  1218. case RINGBUF_TYPE_TIME_STAMP:
  1219. /* FIXME: not implemented */
  1220. rb_advance_iter(iter);
  1221. goto again;
  1222. case RINGBUF_TYPE_DATA:
  1223. if (ts) {
  1224. *ts = iter->read_stamp + event->time_delta;
  1225. ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
  1226. }
  1227. return event;
  1228. default:
  1229. BUG();
  1230. }
  1231. return NULL;
  1232. }
  1233. /**
  1234. * ring_buffer_consume - return an event and consume it
  1235. * @buffer: The ring buffer to get the next event from
  1236. *
  1237. * Returns the next event in the ring buffer, and that event is consumed.
  1238. * Meaning, that sequential reads will keep returning a different event,
  1239. * and eventually empty the ring buffer if the producer is slower.
  1240. */
  1241. struct ring_buffer_event *
  1242. ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
  1243. {
  1244. struct ring_buffer_per_cpu *cpu_buffer;
  1245. struct ring_buffer_event *event;
  1246. if (!cpu_isset(cpu, buffer->cpumask))
  1247. return NULL;
  1248. event = ring_buffer_peek(buffer, cpu, ts);
  1249. if (!event)
  1250. return NULL;
  1251. cpu_buffer = buffer->buffers[cpu];
  1252. rb_advance_reader(cpu_buffer);
  1253. return event;
  1254. }
  1255. /**
  1256. * ring_buffer_read_start - start a non consuming read of the buffer
  1257. * @buffer: The ring buffer to read from
  1258. * @cpu: The cpu buffer to iterate over
  1259. *
  1260. * This starts up an iteration through the buffer. It also disables
  1261. * the recording to the buffer until the reading is finished.
  1262. * This prevents the reading from being corrupted. This is not
  1263. * a consuming read, so a producer is not expected.
  1264. *
  1265. * Must be paired with ring_buffer_finish.
  1266. */
  1267. struct ring_buffer_iter *
  1268. ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
  1269. {
  1270. struct ring_buffer_per_cpu *cpu_buffer;
  1271. struct ring_buffer_iter *iter;
  1272. unsigned long flags;
  1273. if (!cpu_isset(cpu, buffer->cpumask))
  1274. return NULL;
  1275. iter = kmalloc(sizeof(*iter), GFP_KERNEL);
  1276. if (!iter)
  1277. return NULL;
  1278. cpu_buffer = buffer->buffers[cpu];
  1279. iter->cpu_buffer = cpu_buffer;
  1280. atomic_inc(&cpu_buffer->record_disabled);
  1281. synchronize_sched();
  1282. spin_lock_irqsave(&cpu_buffer->lock, flags);
  1283. ring_buffer_iter_reset(iter);
  1284. spin_unlock_irqrestore(&cpu_buffer->lock, flags);
  1285. return iter;
  1286. }
  1287. /**
  1288. * ring_buffer_finish - finish reading the iterator of the buffer
  1289. * @iter: The iterator retrieved by ring_buffer_start
  1290. *
  1291. * This re-enables the recording to the buffer, and frees the
  1292. * iterator.
  1293. */
  1294. void
  1295. ring_buffer_read_finish(struct ring_buffer_iter *iter)
  1296. {
  1297. struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
  1298. atomic_dec(&cpu_buffer->record_disabled);
  1299. kfree(iter);
  1300. }
  1301. /**
  1302. * ring_buffer_read - read the next item in the ring buffer by the iterator
  1303. * @iter: The ring buffer iterator
  1304. * @ts: The time stamp of the event read.
  1305. *
  1306. * This reads the next event in the ring buffer and increments the iterator.
  1307. */
  1308. struct ring_buffer_event *
  1309. ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
  1310. {
  1311. struct ring_buffer_event *event;
  1312. event = ring_buffer_iter_peek(iter, ts);
  1313. if (!event)
  1314. return NULL;
  1315. rb_advance_iter(iter);
  1316. return event;
  1317. }
  1318. /**
  1319. * ring_buffer_size - return the size of the ring buffer (in bytes)
  1320. * @buffer: The ring buffer.
  1321. */
  1322. unsigned long ring_buffer_size(struct ring_buffer *buffer)
  1323. {
  1324. return BUF_PAGE_SIZE * buffer->pages;
  1325. }
  1326. static void
  1327. rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
  1328. {
  1329. cpu_buffer->head_page
  1330. = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
  1331. cpu_buffer->head_page->size = 0;
  1332. cpu_buffer->tail_page = cpu_buffer->head_page;
  1333. cpu_buffer->tail_page->size = 0;
  1334. INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
  1335. cpu_buffer->reader_page->size = 0;
  1336. cpu_buffer->head_page->read = 0;
  1337. cpu_buffer->tail_page->write = 0;
  1338. cpu_buffer->reader_page->read = 0;
  1339. cpu_buffer->overrun = 0;
  1340. cpu_buffer->entries = 0;
  1341. }
  1342. /**
  1343. * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
  1344. * @buffer: The ring buffer to reset a per cpu buffer of
  1345. * @cpu: The CPU buffer to be reset
  1346. */
  1347. void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
  1348. {
  1349. struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
  1350. unsigned long flags;
  1351. if (!cpu_isset(cpu, buffer->cpumask))
  1352. return;
  1353. spin_lock_irqsave(&cpu_buffer->lock, flags);
  1354. rb_reset_cpu(cpu_buffer);
  1355. spin_unlock_irqrestore(&cpu_buffer->lock, flags);
  1356. }
  1357. /**
  1358. * ring_buffer_reset - reset a ring buffer
  1359. * @buffer: The ring buffer to reset all cpu buffers
  1360. */
  1361. void ring_buffer_reset(struct ring_buffer *buffer)
  1362. {
  1363. int cpu;
  1364. for_each_buffer_cpu(buffer, cpu)
  1365. ring_buffer_reset_cpu(buffer, cpu);
  1366. }
  1367. /**
  1368. * rind_buffer_empty - is the ring buffer empty?
  1369. * @buffer: The ring buffer to test
  1370. */
  1371. int ring_buffer_empty(struct ring_buffer *buffer)
  1372. {
  1373. struct ring_buffer_per_cpu *cpu_buffer;
  1374. int cpu;
  1375. /* yes this is racy, but if you don't like the race, lock the buffer */
  1376. for_each_buffer_cpu(buffer, cpu) {
  1377. cpu_buffer = buffer->buffers[cpu];
  1378. if (!rb_per_cpu_empty(cpu_buffer))
  1379. return 0;
  1380. }
  1381. return 1;
  1382. }
  1383. /**
  1384. * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
  1385. * @buffer: The ring buffer
  1386. * @cpu: The CPU buffer to test
  1387. */
  1388. int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
  1389. {
  1390. struct ring_buffer_per_cpu *cpu_buffer;
  1391. if (!cpu_isset(cpu, buffer->cpumask))
  1392. return 1;
  1393. cpu_buffer = buffer->buffers[cpu];
  1394. return rb_per_cpu_empty(cpu_buffer);
  1395. }
  1396. /**
  1397. * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
  1398. * @buffer_a: One buffer to swap with
  1399. * @buffer_b: The other buffer to swap with
  1400. *
  1401. * This function is useful for tracers that want to take a "snapshot"
  1402. * of a CPU buffer and has another back up buffer lying around.
  1403. * it is expected that the tracer handles the cpu buffer not being
  1404. * used at the moment.
  1405. */
  1406. int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
  1407. struct ring_buffer *buffer_b, int cpu)
  1408. {
  1409. struct ring_buffer_per_cpu *cpu_buffer_a;
  1410. struct ring_buffer_per_cpu *cpu_buffer_b;
  1411. if (!cpu_isset(cpu, buffer_a->cpumask) ||
  1412. !cpu_isset(cpu, buffer_b->cpumask))
  1413. return -EINVAL;
  1414. /* At least make sure the two buffers are somewhat the same */
  1415. if (buffer_a->size != buffer_b->size ||
  1416. buffer_a->pages != buffer_b->pages)
  1417. return -EINVAL;
  1418. cpu_buffer_a = buffer_a->buffers[cpu];
  1419. cpu_buffer_b = buffer_b->buffers[cpu];
  1420. /*
  1421. * We can't do a synchronize_sched here because this
  1422. * function can be called in atomic context.
  1423. * Normally this will be called from the same CPU as cpu.
  1424. * If not it's up to the caller to protect this.
  1425. */
  1426. atomic_inc(&cpu_buffer_a->record_disabled);
  1427. atomic_inc(&cpu_buffer_b->record_disabled);
  1428. buffer_a->buffers[cpu] = cpu_buffer_b;
  1429. buffer_b->buffers[cpu] = cpu_buffer_a;
  1430. cpu_buffer_b->buffer = buffer_a;
  1431. cpu_buffer_a->buffer = buffer_b;
  1432. atomic_dec(&cpu_buffer_a->record_disabled);
  1433. atomic_dec(&cpu_buffer_b->record_disabled);
  1434. return 0;
  1435. }