|
@@ -39,7 +39,21 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
|
|
|
{
|
|
|
char *p, *base;
|
|
|
size_t step_size, load_count;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * On TILEPro the striping granularity is a fixed 8KB; on
|
|
|
+ * TILE-Gx it is configurable, and we rely on the fact that
|
|
|
+ * the hypervisor always configures maximum striping, so that
|
|
|
+ * bits 9 and 10 of the PA are part of the stripe function, so
|
|
|
+ * every 512 bytes we hit a striping boundary.
|
|
|
+ *
|
|
|
+ */
|
|
|
+#ifdef __tilegx__
|
|
|
+ const unsigned long STRIPE_WIDTH = 512;
|
|
|
+#else
|
|
|
const unsigned long STRIPE_WIDTH = 8192;
|
|
|
+#endif
|
|
|
+
|
|
|
#ifdef __tilegx__
|
|
|
/*
|
|
|
* On TILE-Gx, we must disable the dstream prefetcher before doing
|
|
@@ -74,7 +88,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
|
|
|
* memory, that one load would be sufficient, but since we may
|
|
|
* be, we also need to back up to the last load issued to
|
|
|
* another memory controller, which would be the point where
|
|
|
- * we crossed an 8KB boundary (the granularity of striping
|
|
|
+ * we crossed a "striping" boundary (the granularity of striping
|
|
|
* across memory controllers). Keep backing up and doing this
|
|
|
* until we are before the beginning of the buffer, or have
|
|
|
* hit all the controllers.
|
|
@@ -88,12 +102,22 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
|
|
|
* every cache line on a full memory stripe on each
|
|
|
* controller" that we simply do that, to simplify the logic.
|
|
|
*
|
|
|
- * FIXME: See bug 9535 for some issues with this code.
|
|
|
+ * On TILE-Gx the hash-for-home function is much more complex,
|
|
|
+ * with the upshot being we can't readily guarantee we have
|
|
|
+ * hit both entries in the 128-entry AMT that were hit by any
|
|
|
+ * load in the entire range, so we just re-load them all.
|
|
|
+ * With larger buffers, we may want to consider using a hypervisor
|
|
|
+ * trap to issue loads directly to each hash-for-home tile for
|
|
|
+ * each controller (doing it from Linux would trash the TLB).
|
|
|
*/
|
|
|
if (hfh) {
|
|
|
step_size = L2_CACHE_BYTES;
|
|
|
+#ifdef __tilegx__
|
|
|
+ load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES;
|
|
|
+#else
|
|
|
load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) *
|
|
|
(1 << CHIP_LOG_NUM_MSHIMS());
|
|
|
+#endif
|
|
|
} else {
|
|
|
step_size = STRIPE_WIDTH;
|
|
|
load_count = (1 << CHIP_LOG_NUM_MSHIMS());
|