16 years ago · a5598ca0d4
--- a/arch/powerpc/oprofile/cell/pr_util.h
+++ b/arch/powerpc/oprofile/cell/pr_util.h
@@ -24,6 +24,11 @@
 
															 #define SKIP_GENERIC_SYNC 0
														
 
															 #define SYNC_START_ERROR -1
														
 
															 #define DO_GENERIC_SYNC 1
														
 
															+#define SPUS_PER_NODE   8
														
 
															+#define DEFAULT_TIMER_EXPIRE  (HZ / 10)
														
 
															+
														
 
															+extern struct delayed_work spu_work;
														
 
															+extern int spu_prof_running;
														
 
															 struct spu_overlay_info {	/* map of sections within an SPU overlay */
														
 
															 	unsigned int vma;	/* SPU virtual memory address from elf */
														
@@ -62,6 +67,14 @@ struct vma_to_fileoffset_map {	/* map of sections within an SPU program */
 
															 };
														
 
															+struct spu_buffer {
														
 
															+	int last_guard_val;
														
 
															+	int ctx_sw_seen;
														
 
															+	unsigned long *buff;
														
 
															+	unsigned int head, tail;
														
 
															+};
														
 
															+
														
 
															+
														
 
															 /* The three functions below are for maintaining and accessing
														
 
															  * the vma-to-fileoffset map.
														
 
															  */
														
--- a/arch/powerpc/oprofile/cell/spu_profiler.c
+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
@@ -23,12 +23,11 @@
 
															 static u32 *samples;
														
 
															-static int spu_prof_running;
														
 
															+int spu_prof_running;
														
 
															 static unsigned int profiling_interval;
														
 
															 #define NUM_SPU_BITS_TRBUF 16
														
 
															 #define SPUS_PER_TB_ENTRY   4
														
 
															-#define SPUS_PER_NODE	     8
														
 
															 #define SPU_PC_MASK	     0xFFFF
														
@@ -208,6 +207,7 @@ int start_spu_profiling(unsigned int cycles_reset)
 
															 	spu_prof_running = 1;
														
 
															 	hrtimer_start(&timer, kt, HRTIMER_MODE_REL);
														
 
															+	schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
														
 
															 	return 0;
														
 
															 }
														
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -35,7 +35,102 @@ static DEFINE_SPINLOCK(buffer_lock);
 
															 static DEFINE_SPINLOCK(cache_lock);
														
 
															 static int num_spu_nodes;
														
 
															 int spu_prof_num_nodes;
														
 
															-int last_guard_val[MAX_NUMNODES * 8];
														
 
															+
														
 
															+struct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE];
														
 
															+struct delayed_work spu_work;
														
 
															+static unsigned max_spu_buff;
														
 
															+
														
 
															+static void spu_buff_add(unsigned long int value, int spu)
														
 
															+{
														
 
															+	/* spu buff is a circular buffer.  Add entries to the
														
 
															+	 * head.  Head is the index to store the next value.
														
 
															+	 * The buffer is full when there is one available entry
														
 
															+	 * in the queue, i.e. head and tail can't be equal.
														
 
															+	 * That way we can tell the difference between the
														
 
															+	 * buffer being full versus empty.
														
 
															+	 *
														
 
															+	 *  ASSUPTION: the buffer_lock is held when this function
														
 
															+	 *             is called to lock the buffer, head and tail.
														
 
															+	 */
														
 
															+	int full = 1;
														
 
															+
														
 
															+	if (spu_buff[spu].head >= spu_buff[spu].tail) {
														
 
															+		if ((spu_buff[spu].head - spu_buff[spu].tail)
														
 
															+		    <  (max_spu_buff - 1))
														
 
															+			full = 0;
														
 
															+
														
 
															+	} else if (spu_buff[spu].tail > spu_buff[spu].head) {
														
 
															+		if ((spu_buff[spu].tail - spu_buff[spu].head)
														
 
															+		    > 1)
														
 
															+			full = 0;
														
 
															+	}
														
 
															+
														
 
															+	if (!full) {
														
 
															+		spu_buff[spu].buff[spu_buff[spu].head] = value;
														
 
															+		spu_buff[spu].head++;
														
 
															+
														
 
															+		if (spu_buff[spu].head >= max_spu_buff)
														
 
															+			spu_buff[spu].head = 0;
														
 
															+	} else {
														
 
															+		/* From the user's perspective make the SPU buffer
														
 
															+		 * size management/overflow look like we are using
														
 
															+		 * per cpu buffers.  The user uses the same
														
 
															+		 * per cpu parameter to adjust the SPU buffer size.
														
 
															+		 * Increment the sample_lost_overflow to inform
														
 
															+		 * the user the buffer size needs to be increased.
														
 
															+		 */
														
 
															+		oprofile_cpu_buffer_inc_smpl_lost();
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/* This function copies the per SPU buffers to the
														
 
															+ * OProfile kernel buffer.
														
 
															+ */
														
 
															+void sync_spu_buff(void)
														
 
															+{
														
 
															+	int spu;
														
 
															+	unsigned long flags;
														
 
															+	int curr_head;
														
 
															+
														
 
															+	for (spu = 0; spu < num_spu_nodes; spu++) {
														
 
															+		/* In case there was an issue and the buffer didn't
														
 
															+		 * get created skip it.
														
 
															+		 */
														
 
															+		if (spu_buff[spu].buff == NULL)
														
 
															+			continue;
														
 
															+
														
 
															+		/* Hold the lock to make sure the head/tail
														
 
															+		 * doesn't change while spu_buff_add() is
														
 
															+		 * deciding if the buffer is full or not.
														
 
															+		 * Being a little paranoid.
														
 
															+		 */
														
 
															+		spin_lock_irqsave(&buffer_lock, flags);
														
 
															+		curr_head = spu_buff[spu].head;
														
 
															+		spin_unlock_irqrestore(&buffer_lock, flags);
														
 
															+
														
 
															+		/* Transfer the current contents to the kernel buffer.
														
 
															+		 * data can still be added to the head of the buffer.
														
 
															+		 */
														
 
															+		oprofile_put_buff(spu_buff[spu].buff,
														
 
															+				  spu_buff[spu].tail,
														
 
															+				  curr_head, max_spu_buff);
														
 
															+
														
 
															+		spin_lock_irqsave(&buffer_lock, flags);
														
 
															+		spu_buff[spu].tail = curr_head;
														
 
															+		spin_unlock_irqrestore(&buffer_lock, flags);
														
 
															+	}
														
 
															+
														
 
															+}
														
 
															+
														
 
															+static void wq_sync_spu_buff(struct work_struct *work)
														
 
															+{
														
 
															+	/* move data from spu buffers to kernel buffer */
														
 
															+	sync_spu_buff();
														
 
															+
														
 
															+	/* only reschedule if profiling is not done */
														
 
															+	if (spu_prof_running)
														
 
															+		schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
														
 
															+}
														
 
															 /* Container for caching information about an active SPU task. */
														
 
															 struct cached_info {
														
@@ -305,14 +400,21 @@ static int process_context_switch(struct spu *spu, unsigned long objectId)
 
															 	/* Record context info in event buffer */
														
 
															 	spin_lock_irqsave(&buffer_lock, flags);
														
 
															-	add_event_entry(ESCAPE_CODE);
														
 
															-	add_event_entry(SPU_CTX_SWITCH_CODE);
														
 
															-	add_event_entry(spu->number);
														
 
															-	add_event_entry(spu->pid);
														
 
															-	add_event_entry(spu->tgid);
														
 
															-	add_event_entry(app_dcookie);
														
 
															-	add_event_entry(spu_cookie);
														
 
															-	add_event_entry(offset);
														
 
															+	spu_buff_add(ESCAPE_CODE, spu->number);
														
 
															+	spu_buff_add(SPU_CTX_SWITCH_CODE, spu->number);
														
 
															+	spu_buff_add(spu->number, spu->number);
														
 
															+	spu_buff_add(spu->pid, spu->number);
														
 
															+	spu_buff_add(spu->tgid, spu->number);
														
 
															+	spu_buff_add(app_dcookie, spu->number);
														
 
															+	spu_buff_add(spu_cookie, spu->number);
														
 
															+	spu_buff_add(offset, spu->number);
														
 
															+
														
 
															+	/* Set flag to indicate SPU PC data can now be written out.  If
														
 
															+	 * the SPU program counter data is seen before an SPU context
														
 
															+	 * record is seen, the postprocessing will fail.
														
 
															+	 */
														
 
															+	spu_buff[spu->number].ctx_sw_seen = 1;
														
 
															+
														
 
															 	spin_unlock_irqrestore(&buffer_lock, flags);
														
 
															 	smp_wmb();	/* insure spu event buffer updates are written */
														
 
															 			/* don't want entries intermingled... */
														
@@ -360,6 +462,47 @@ static int number_of_online_nodes(void)
 
															         return nodes;
														
 
															 }
														
 
															+static int oprofile_spu_buff_create(void)
														
 
															+{
														
 
															+	int spu;
														
 
															+
														
 
															+	max_spu_buff = oprofile_get_cpu_buffer_size();
														
 
															+
														
 
															+	for (spu = 0; spu < num_spu_nodes; spu++) {
														
 
															+		/* create circular buffers to store the data in.
														
 
															+		 * use locks to manage accessing the buffers
														
 
															+		 */
														
 
															+		spu_buff[spu].head = 0;
														
 
															+		spu_buff[spu].tail = 0;
														
 
															+
														
 
															+		/*
														
 
															+		 * Create a buffer for each SPU.  Can't reliably
														
 
															+		 * create a single buffer for all spus due to not
														
 
															+		 * enough contiguous kernel memory.
														
 
															+		 */
														
 
															+
														
 
															+		spu_buff[spu].buff = kzalloc((max_spu_buff
														
 
															+					      * sizeof(unsigned long)),
														
 
															+					     GFP_KERNEL);
														
 
															+
														
 
															+		if (!spu_buff[spu].buff) {
														
 
															+			printk(KERN_ERR "SPU_PROF: "
														
 
															+			       "%s, line %d:  oprofile_spu_buff_create "
														
 
															+		       "failed to allocate spu buffer %d.\n",
														
 
															+			       __func__, __LINE__, spu);
														
 
															+
														
 
															+			/* release the spu buffers that have been allocated */
														
 
															+			while (spu >= 0) {
														
 
															+				kfree(spu_buff[spu].buff);
														
 
															+				spu_buff[spu].buff = 0;
														
 
															+				spu--;
														
 
															+			}
														
 
															+			return -ENOMEM;
														
 
															+		}
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 /* The main purpose of this function is to synchronize
														
 
															  * OProfile with SPUFS by registering to be notified of
														
 
															  * SPU task switches.
														
@@ -372,20 +515,35 @@ static int number_of_online_nodes(void)
 
															  */
														
 
															 int spu_sync_start(void)
														
 
															 {
														
 
															-	int k;
														
 
															+	int spu;
														
 
															 	int ret = SKIP_GENERIC_SYNC;
														
 
															 	int register_ret;
														
 
															 	unsigned long flags = 0;
														
 
															 	spu_prof_num_nodes = number_of_online_nodes();
														
 
															 	num_spu_nodes = spu_prof_num_nodes * 8;
														
 
															+	INIT_DELAYED_WORK(&spu_work, wq_sync_spu_buff);
														
 
															+
														
 
															+	/* create buffer for storing the SPU data to put in
														
 
															+	 * the kernel buffer.
														
 
															+	 */
														
 
															+	ret = oprofile_spu_buff_create();
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															 	spin_lock_irqsave(&buffer_lock, flags);
														
 
															-	add_event_entry(ESCAPE_CODE);
														
 
															-	add_event_entry(SPU_PROFILING_CODE);
														
 
															-	add_event_entry(num_spu_nodes);
														
 
															+	for (spu = 0; spu < num_spu_nodes; spu++) {
														
 
															+		spu_buff_add(ESCAPE_CODE, spu);
														
 
															+		spu_buff_add(SPU_PROFILING_CODE, spu);
														
 
															+		spu_buff_add(num_spu_nodes, spu);
														
 
															+	}
														
 
															 	spin_unlock_irqrestore(&buffer_lock, flags);
														
 
															+	for (spu = 0; spu < num_spu_nodes; spu++) {
														
 
															+		spu_buff[spu].ctx_sw_seen = 0;
														
 
															+		spu_buff[spu].last_guard_val = 0;
														
 
															+	}
														
 
															+
														
 
															 	/* Register for SPU events  */
														
 
															 	register_ret = spu_switch_event_register(&spu_active);
														
 
															 	if (register_ret) {
														
@@ -393,8 +551,6 @@ int spu_sync_start(void)
 
															 		goto out;
														
 
															 	}
														
 
															-	for (k = 0; k < (MAX_NUMNODES * 8); k++)
														
 
															-		last_guard_val[k] = 0;
														
 
															 	pr_debug("spu_sync_start -- running.\n");
														
 
															 out:
														
 
															 	return ret;
														
@@ -446,13 +602,20 @@ void spu_sync_buffer(int spu_num, unsigned int *samples,
 
															 		 * use.	 We need to discard samples taken during the time
														
 
															 		 * period which an overlay occurs (i.e., guard value changes).
														
 
															 		 */
														
 
															-		if (grd_val && grd_val != last_guard_val[spu_num]) {
														
 
															-			last_guard_val[spu_num] = grd_val;
														
 
															+		if (grd_val && grd_val != spu_buff[spu_num].last_guard_val) {
														
 
															+			spu_buff[spu_num].last_guard_val = grd_val;
														
 
															 			/* Drop the rest of the samples. */
														
 
															 			break;
														
 
															 		}
														
 
															-		add_event_entry(file_offset | spu_num_shifted);
														
 
															+		/* We must ensure that the SPU context switch has been written
														
 
															+		 * out before samples for the SPU.  Otherwise, the SPU context
														
 
															+		 * information is not available and the postprocessing of the
														
 
															+		 * SPU PC will fail with no available anonymous map information.
														
 
															+		 */
														
 
															+		if (spu_buff[spu_num].ctx_sw_seen)
														
 
															+			spu_buff_add((file_offset | spu_num_shifted),
														
 
															+					 spu_num);
														
 
															 	}
														
 
															 	spin_unlock(&buffer_lock);
														
 
															 out:
														
@@ -463,20 +626,41 @@ out:
 
															 int spu_sync_stop(void)
														
 
															 {
														
 
															 	unsigned long flags = 0;
														
 
															-	int ret = spu_switch_event_unregister(&spu_active);
														
 
															-	if (ret) {
														
 
															+	int ret;
														
 
															+	int k;
														
 
															+
														
 
															+	ret = spu_switch_event_unregister(&spu_active);
														
 
															+
														
 
															+	if (ret)
														
 
															 		printk(KERN_ERR "SPU_PROF: "
														
 
															-			"%s, line %d: spu_switch_event_unregister returned %d\n",
														
 
															-			__func__, __LINE__, ret);
														
 
															-		goto out;
														
 
															-	}
														
 
															+		       "%s, line %d: spu_switch_event_unregister "	\
														
 
															+		       "returned %d\n",
														
 
															+		       __func__, __LINE__, ret);
														
 
															+
														
 
															+	/* flush any remaining data in the per SPU buffers */
														
 
															+	sync_spu_buff();
														
 
															 	spin_lock_irqsave(&cache_lock, flags);
														
 
															 	ret = release_cached_info(RELEASE_ALL);
														
 
															 	spin_unlock_irqrestore(&cache_lock, flags);
														
 
															-out:
														
 
															+
														
 
															+	/* remove scheduled work queue item rather then waiting
														
 
															+	 * for every queued entry to execute.  Then flush pending
														
 
															+	 * system wide buffer to event buffer.
														
 
															+	 */
														
 
															+	cancel_delayed_work(&spu_work);
														
 
															+
														
 
															+	for (k = 0; k < num_spu_nodes; k++) {
														
 
															+		spu_buff[k].ctx_sw_seen = 0;
														
 
															+
														
 
															+		/*
														
 
															+		 * spu_sys_buff will be null if there was a problem
														
 
															+		 * allocating the buffer.  Only delete if it exists.
														
 
															+		 */
														
 
															+		kfree(spu_buff[k].buff);
														
 
															+		spu_buff[k].buff = 0;
														
 
															+	}
														
 
															 	pr_debug("spu_sync_stop -- done.\n");
														
 
															 	return ret;
														
 
															 }
														
 
															-
														
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -628,3 +628,27 @@ void sync_buffer(int cpu)
 
															 	mutex_unlock(&buffer_mutex);
														
 
															 }
														
 
															+
														
 
															+/* The function can be used to add a buffer worth of data directly to
														
 
															+ * the kernel buffer. The buffer is assumed to be a circular buffer.
														
 
															+ * Take the entries from index start and end at index end, wrapping
														
 
															+ * at max_entries.
														
 
															+ */
														
 
															+void oprofile_put_buff(unsigned long *buf, unsigned int start,
														
 
															+		       unsigned int stop, unsigned int max)
														
 
															+{
														
 
															+	int i;
														
 
															+
														
 
															+	i = start;
														
 
															+
														
 
															+	mutex_lock(&buffer_mutex);
														
 
															+	while (i != stop) {
														
 
															+		add_event_entry(buf[i++]);
														
 
															+
														
 
															+		if (i >= max)
														
 
															+			i = 0;
														
 
															+	}
														
 
															+
														
 
															+	mutex_unlock(&buffer_mutex);
														
 
															+}
														
 
															+
														
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -38,13 +38,26 @@ static int work_enabled;
 
															 void free_cpu_buffers(void)
														
 
															 {
														
 
															 	int i;
														
 
															- 
														
 
															+
														
 
															 	for_each_online_cpu(i) {
														
 
															 		vfree(per_cpu(cpu_buffer, i).buffer);
														
 
															 		per_cpu(cpu_buffer, i).buffer = NULL;
														
 
															 	}
														
 
															 }
														
 
															+unsigned long oprofile_get_cpu_buffer_size(void)
														
 
															+{
														
 
															+	return fs_cpu_buffer_size;
														
 
															+}
														
 
															+
														
 
															+void oprofile_cpu_buffer_inc_smpl_lost(void)
														
 
															+{
														
 
															+	struct oprofile_cpu_buffer *cpu_buf
														
 
															+		= &__get_cpu_var(cpu_buffer);
														
 
															+
														
 
															+	cpu_buf->sample_lost_overflow++;
														
 
															+}
														
 
															+
														
 
															 int alloc_cpu_buffers(void)
														
 
															 {
														
 
															 	int i;
														
--- a/drivers/oprofile/event_buffer.h
+++ b/drivers/oprofile/event_buffer.h
@@ -17,6 +17,13 @@ int alloc_event_buffer(void);
 
															 void free_event_buffer(void);
														
 
															+/**
														
 
															+ * Add data to the event buffer.
														
 
															+ * The data passed is free-form, but typically consists of
														
 
															+ * file offsets, dcookies, context information, and ESCAPE codes.
														
 
															+ */
														
 
															+void add_event_entry(unsigned long data);
														
 
															+
														
 
															 /* wake up the process sleeping on the event file */
														
 
															 void wake_up_buffer_waiter(void);
														
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -85,13 +85,6 @@ int oprofile_arch_init(struct oprofile_operations * ops);
 
															  */
														
 
															 void oprofile_arch_exit(void);
														
 
															-/**
														
 
															- * Add data to the event buffer.
														
 
															- * The data passed is free-form, but typically consists of
														
 
															- * file offsets, dcookies, context information, and ESCAPE codes.
														
 
															- */
														
 
															-void add_event_entry(unsigned long data);
														
 
															-
														
 
															 /**
														
 
															  * Add a sample. This may be called from any context. Pass
														
 
															  * smp_processor_id() as cpu.
														
@@ -162,5 +155,14 @@ int oprofilefs_ulong_from_user(unsigned long * val, char const __user * buf, siz
 
															 /** lock for read/write safety */
														
 
															 extern spinlock_t oprofilefs_lock;
														
 
															+
														
 
															+/**
														
 
															+ * Add the contents of a circular buffer to the event buffer.
														
 
															+ */
														
 
															+void oprofile_put_buff(unsigned long *buf, unsigned int start,
														
 
															+			unsigned int stop, unsigned int max);
														
 
															+
														
 
															+unsigned long oprofile_get_cpu_buffer_size(void);
														
 
															+void oprofile_cpu_buffer_inc_smpl_lost(void);
														
 
															 #endif /* OPROFILE_H */