|
@@ -3628,22 +3628,24 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-static int mark_printk(const char *fmt, ...)
|
|
|
-{
|
|
|
- int ret;
|
|
|
- va_list args;
|
|
|
- va_start(args, fmt);
|
|
|
- ret = trace_vprintk(0, fmt, args);
|
|
|
- va_end(args);
|
|
|
- return ret;
|
|
|
-}
|
|
|
-
|
|
|
static ssize_t
|
|
|
tracing_mark_write(struct file *filp, const char __user *ubuf,
|
|
|
size_t cnt, loff_t *fpos)
|
|
|
{
|
|
|
- char *buf;
|
|
|
- size_t written;
|
|
|
+ unsigned long addr = (unsigned long)ubuf;
|
|
|
+ struct ring_buffer_event *event;
|
|
|
+ struct ring_buffer *buffer;
|
|
|
+ struct print_entry *entry;
|
|
|
+ unsigned long irq_flags;
|
|
|
+ struct page *pages[2];
|
|
|
+ int nr_pages = 1;
|
|
|
+ ssize_t written;
|
|
|
+ void *page1;
|
|
|
+ void *page2;
|
|
|
+ int offset;
|
|
|
+ int size;
|
|
|
+ int len;
|
|
|
+ int ret;
|
|
|
|
|
|
if (tracing_disabled)
|
|
|
return -EINVAL;
|
|
@@ -3651,28 +3653,81 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
|
|
|
if (cnt > TRACE_BUF_SIZE)
|
|
|
cnt = TRACE_BUF_SIZE;
|
|
|
|
|
|
- buf = kmalloc(cnt + 2, GFP_KERNEL);
|
|
|
- if (buf == NULL)
|
|
|
- return -ENOMEM;
|
|
|
+ /*
|
|
|
+ * Userspace is injecting traces into the kernel trace buffer.
|
|
|
+ * We want to be as non intrusive as possible.
|
|
|
+ * To do so, we do not want to allocate any special buffers
|
|
|
+ * or take any locks, but instead write the userspace data
|
|
|
+ * straight into the ring buffer.
|
|
|
+ *
|
|
|
+ * First we need to pin the userspace buffer into memory,
|
|
|
+ * which, most likely it is, because it just referenced it.
|
|
|
+ * But there's no guarantee that it is. By using get_user_pages_fast()
|
|
|
+ * and kmap_atomic/kunmap_atomic() we can get access to the
|
|
|
+ * pages directly. We then write the data directly into the
|
|
|
+ * ring buffer.
|
|
|
+ */
|
|
|
+ BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
|
|
|
|
|
|
- if (copy_from_user(buf, ubuf, cnt)) {
|
|
|
- kfree(buf);
|
|
|
- return -EFAULT;
|
|
|
+ /* check if we cross pages */
|
|
|
+ if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
|
|
|
+ nr_pages = 2;
|
|
|
+
|
|
|
+ offset = addr & (PAGE_SIZE - 1);
|
|
|
+ addr &= PAGE_MASK;
|
|
|
+
|
|
|
+ ret = get_user_pages_fast(addr, nr_pages, 0, pages);
|
|
|
+ if (ret < nr_pages) {
|
|
|
+ while (--ret >= 0)
|
|
|
+ put_page(pages[ret]);
|
|
|
+ written = -EFAULT;
|
|
|
+ goto out;
|
|
|
}
|
|
|
- if (buf[cnt-1] != '\n') {
|
|
|
- buf[cnt] = '\n';
|
|
|
- buf[cnt+1] = '\0';
|
|
|
+
|
|
|
+ page1 = kmap_atomic(pages[0]);
|
|
|
+ if (nr_pages == 2)
|
|
|
+ page2 = kmap_atomic(pages[1]);
|
|
|
+
|
|
|
+ local_save_flags(irq_flags);
|
|
|
+ size = sizeof(*entry) + cnt + 2; /* possible \n added */
|
|
|
+ buffer = global_trace.buffer;
|
|
|
+ event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
|
|
|
+ irq_flags, preempt_count());
|
|
|
+ if (!event) {
|
|
|
+ /* Ring buffer disabled, return as if not open for write */
|
|
|
+ written = -EBADF;
|
|
|
+ goto out_unlock;
|
|
|
+ }
|
|
|
+
|
|
|
+ entry = ring_buffer_event_data(event);
|
|
|
+ entry->ip = _THIS_IP_;
|
|
|
+
|
|
|
+ if (nr_pages == 2) {
|
|
|
+ len = PAGE_SIZE - offset;
|
|
|
+ memcpy(&entry->buf, page1 + offset, len);
|
|
|
+ memcpy(&entry->buf[len], page2, cnt - len);
|
|
|
} else
|
|
|
- buf[cnt] = '\0';
|
|
|
+ memcpy(&entry->buf, page1 + offset, cnt);
|
|
|
|
|
|
- written = mark_printk("%s", buf);
|
|
|
- kfree(buf);
|
|
|
- *fpos += written;
|
|
|
+ if (entry->buf[cnt - 1] != '\n') {
|
|
|
+ entry->buf[cnt] = '\n';
|
|
|
+ entry->buf[cnt + 1] = '\0';
|
|
|
+ } else
|
|
|
+ entry->buf[cnt] = '\0';
|
|
|
+
|
|
|
+ ring_buffer_unlock_commit(buffer, event);
|
|
|
|
|
|
- /* don't tell userspace we wrote more - it might confuse them */
|
|
|
- if (written > cnt)
|
|
|
- written = cnt;
|
|
|
+ written = cnt;
|
|
|
|
|
|
+ *fpos += written;
|
|
|
+
|
|
|
+ out_unlock:
|
|
|
+ if (nr_pages == 2)
|
|
|
+ kunmap_atomic(page2);
|
|
|
+ kunmap_atomic(page1);
|
|
|
+ while (nr_pages > 0)
|
|
|
+ put_page(pages[--nr_pages]);
|
|
|
+ out:
|
|
|
return written;
|
|
|
}
|
|
|
|