|
@@ -25,6 +25,16 @@
|
|
|
#include "selftest.h"
|
|
|
#include "workarounds.h"
|
|
|
|
|
|
+/* IRQ latency can be enormous because:
|
|
|
+ * - All IRQs may be disabled on a CPU for a *long* time by e.g. a
|
|
|
+ * slow serial console or an old IDE driver doing error recovery
|
|
|
+ * - The PREEMPT_RT patches mostly deal with this, but also allow a
|
|
|
+ * tasklet or normal task to be given higher priority than our IRQ
|
|
|
+ * threads
|
|
|
+ * Try to avoid blaming the hardware for this.
|
|
|
+ */
|
|
|
+#define IRQ_TIMEOUT HZ
|
|
|
+
|
|
|
/*
|
|
|
* Loopback test packet structure
|
|
|
*
|
|
@@ -77,6 +87,9 @@ struct efx_loopback_state {
|
|
|
struct efx_loopback_payload payload;
|
|
|
};
|
|
|
|
|
|
+/* How long to wait for all the packets to arrive (in ms) */
|
|
|
+#define LOOPBACK_TIMEOUT_MS 1000
|
|
|
+
|
|
|
/**************************************************************************
|
|
|
*
|
|
|
* MII, NVRAM and register tests
|
|
@@ -130,23 +143,25 @@ static int efx_test_chip(struct efx_nic *efx, struct efx_self_tests *tests)
|
|
|
static int efx_test_interrupts(struct efx_nic *efx,
|
|
|
struct efx_self_tests *tests)
|
|
|
{
|
|
|
+ unsigned long timeout, wait;
|
|
|
int cpu;
|
|
|
|
|
|
netif_dbg(efx, drv, efx->net_dev, "testing interrupts\n");
|
|
|
tests->interrupt = -1;
|
|
|
|
|
|
- /* Reset interrupt flag */
|
|
|
- efx->last_irq_cpu = -1;
|
|
|
- smp_wmb();
|
|
|
-
|
|
|
- efx_nic_generate_interrupt(efx);
|
|
|
+ efx_nic_irq_test_start(efx);
|
|
|
+ timeout = jiffies + IRQ_TIMEOUT;
|
|
|
+ wait = 1;
|
|
|
|
|
|
/* Wait for arrival of test interrupt. */
|
|
|
netif_dbg(efx, drv, efx->net_dev, "waiting for test interrupt\n");
|
|
|
- schedule_timeout_uninterruptible(HZ / 10);
|
|
|
- cpu = ACCESS_ONCE(efx->last_irq_cpu);
|
|
|
- if (cpu >= 0)
|
|
|
- goto success;
|
|
|
+ do {
|
|
|
+ schedule_timeout_uninterruptible(wait);
|
|
|
+ cpu = efx_nic_irq_test_irq_cpu(efx);
|
|
|
+ if (cpu >= 0)
|
|
|
+ goto success;
|
|
|
+ wait *= 2;
|
|
|
+ } while (time_before(jiffies, timeout));
|
|
|
|
|
|
netif_err(efx, drv, efx->net_dev, "timed out waiting for interrupt\n");
|
|
|
return -ETIMEDOUT;
|
|
@@ -159,61 +174,86 @@ static int efx_test_interrupts(struct efx_nic *efx,
|
|
|
}
|
|
|
|
|
|
/* Test generation and receipt of interrupting events */
|
|
|
-static int efx_test_eventq_irq(struct efx_channel *channel,
|
|
|
+static int efx_test_eventq_irq(struct efx_nic *efx,
|
|
|
struct efx_self_tests *tests)
|
|
|
{
|
|
|
- struct efx_nic *efx = channel->efx;
|
|
|
- unsigned int read_ptr;
|
|
|
- bool napi_ran, dma_seen, int_seen;
|
|
|
+ struct efx_channel *channel;
|
|
|
+ unsigned int read_ptr[EFX_MAX_CHANNELS];
|
|
|
+ unsigned long napi_ran = 0, dma_pend = 0, int_pend = 0;
|
|
|
+ unsigned long timeout, wait;
|
|
|
|
|
|
- read_ptr = channel->eventq_read_ptr;
|
|
|
- channel->last_irq_cpu = -1;
|
|
|
- smp_wmb();
|
|
|
+ BUILD_BUG_ON(EFX_MAX_CHANNELS > BITS_PER_LONG);
|
|
|
|
|
|
- efx_nic_generate_test_event(channel);
|
|
|
+ efx_for_each_channel(channel, efx) {
|
|
|
+ read_ptr[channel->channel] = channel->eventq_read_ptr;
|
|
|
+ set_bit(channel->channel, &dma_pend);
|
|
|
+ set_bit(channel->channel, &int_pend);
|
|
|
+ efx_nic_event_test_start(channel);
|
|
|
+ }
|
|
|
|
|
|
- /* Wait for arrival of interrupt. NAPI processing may or may
|
|
|
+ timeout = jiffies + IRQ_TIMEOUT;
|
|
|
+ wait = 1;
|
|
|
+
|
|
|
+ /* Wait for arrival of interrupts. NAPI processing may or may
|
|
|
* not complete in time, but we can cope in any case.
|
|
|
*/
|
|
|
- msleep(10);
|
|
|
- napi_disable(&channel->napi_str);
|
|
|
- if (channel->eventq_read_ptr != read_ptr) {
|
|
|
- napi_ran = true;
|
|
|
- dma_seen = true;
|
|
|
- int_seen = true;
|
|
|
- } else {
|
|
|
- napi_ran = false;
|
|
|
- dma_seen = efx_nic_event_present(channel);
|
|
|
- int_seen = ACCESS_ONCE(channel->last_irq_cpu) >= 0;
|
|
|
- }
|
|
|
- napi_enable(&channel->napi_str);
|
|
|
- efx_nic_eventq_read_ack(channel);
|
|
|
+ do {
|
|
|
+ schedule_timeout_uninterruptible(wait);
|
|
|
+
|
|
|
+ efx_for_each_channel(channel, efx) {
|
|
|
+ napi_disable(&channel->napi_str);
|
|
|
+ if (channel->eventq_read_ptr !=
|
|
|
+ read_ptr[channel->channel]) {
|
|
|
+ set_bit(channel->channel, &napi_ran);
|
|
|
+ clear_bit(channel->channel, &dma_pend);
|
|
|
+ clear_bit(channel->channel, &int_pend);
|
|
|
+ } else {
|
|
|
+ if (efx_nic_event_present(channel))
|
|
|
+ clear_bit(channel->channel, &dma_pend);
|
|
|
+ if (efx_nic_event_test_irq_cpu(channel) >= 0)
|
|
|
+ clear_bit(channel->channel, &int_pend);
|
|
|
+ }
|
|
|
+ napi_enable(&channel->napi_str);
|
|
|
+ efx_nic_eventq_read_ack(channel);
|
|
|
+ }
|
|
|
|
|
|
- tests->eventq_dma[channel->channel] = dma_seen ? 1 : -1;
|
|
|
- tests->eventq_int[channel->channel] = int_seen ? 1 : -1;
|
|
|
+ wait *= 2;
|
|
|
+ } while ((dma_pend || int_pend) && time_before(jiffies, timeout));
|
|
|
|
|
|
- if (dma_seen && int_seen) {
|
|
|
- netif_dbg(efx, drv, efx->net_dev,
|
|
|
- "channel %d event queue passed (with%s NAPI)\n",
|
|
|
- channel->channel, napi_ran ? "" : "out");
|
|
|
- return 0;
|
|
|
- } else {
|
|
|
- /* Report failure and whether either interrupt or DMA worked */
|
|
|
- netif_err(efx, drv, efx->net_dev,
|
|
|
- "channel %d timed out waiting for event queue\n",
|
|
|
- channel->channel);
|
|
|
- if (int_seen)
|
|
|
- netif_err(efx, drv, efx->net_dev,
|
|
|
- "channel %d saw interrupt "
|
|
|
- "during event queue test\n",
|
|
|
- channel->channel);
|
|
|
- if (dma_seen)
|
|
|
+ efx_for_each_channel(channel, efx) {
|
|
|
+ bool dma_seen = !test_bit(channel->channel, &dma_pend);
|
|
|
+ bool int_seen = !test_bit(channel->channel, &int_pend);
|
|
|
+
|
|
|
+ tests->eventq_dma[channel->channel] = dma_seen ? 1 : -1;
|
|
|
+ tests->eventq_int[channel->channel] = int_seen ? 1 : -1;
|
|
|
+
|
|
|
+ if (dma_seen && int_seen) {
|
|
|
+ netif_dbg(efx, drv, efx->net_dev,
|
|
|
+ "channel %d event queue passed (with%s NAPI)\n",
|
|
|
+ channel->channel,
|
|
|
+ test_bit(channel->channel, &napi_ran) ?
|
|
|
+ "" : "out");
|
|
|
+ } else {
|
|
|
+ /* Report failure and whether either interrupt or DMA
|
|
|
+ * worked
|
|
|
+ */
|
|
|
netif_err(efx, drv, efx->net_dev,
|
|
|
- "channel %d event was generated, but "
|
|
|
- "failed to trigger an interrupt\n",
|
|
|
+ "channel %d timed out waiting for event queue\n",
|
|
|
channel->channel);
|
|
|
- return -ETIMEDOUT;
|
|
|
+ if (int_seen)
|
|
|
+ netif_err(efx, drv, efx->net_dev,
|
|
|
+ "channel %d saw interrupt "
|
|
|
+ "during event queue test\n",
|
|
|
+ channel->channel);
|
|
|
+ if (dma_seen)
|
|
|
+ netif_err(efx, drv, efx->net_dev,
|
|
|
+ "channel %d event was generated, but "
|
|
|
+ "failed to trigger an interrupt\n",
|
|
|
+ channel->channel);
|
|
|
+ }
|
|
|
}
|
|
|
+
|
|
|
+ return (dma_pend || int_pend) ? -ETIMEDOUT : 0;
|
|
|
}
|
|
|
|
|
|
static int efx_test_phy(struct efx_nic *efx, struct efx_self_tests *tests,
|
|
@@ -516,10 +556,10 @@ efx_test_loopback(struct efx_tx_queue *tx_queue,
|
|
|
begin_rc = efx_begin_loopback(tx_queue);
|
|
|
|
|
|
/* This will normally complete very quickly, but be
|
|
|
- * prepared to wait up to 100 ms. */
|
|
|
+ * prepared to wait much longer. */
|
|
|
msleep(1);
|
|
|
if (!efx_poll_loopback(efx)) {
|
|
|
- msleep(100);
|
|
|
+ msleep(LOOPBACK_TIMEOUT_MS);
|
|
|
efx_poll_loopback(efx);
|
|
|
}
|
|
|
|
|
@@ -660,9 +700,10 @@ int efx_selftest(struct efx_nic *efx, struct efx_self_tests *tests,
|
|
|
enum efx_loopback_mode loopback_mode = efx->loopback_mode;
|
|
|
int phy_mode = efx->phy_mode;
|
|
|
enum reset_type reset_method = RESET_TYPE_INVISIBLE;
|
|
|
- struct efx_channel *channel;
|
|
|
int rc_test = 0, rc_reset = 0, rc;
|
|
|
|
|
|
+ efx_selftest_async_cancel(efx);
|
|
|
+
|
|
|
/* Online (i.e. non-disruptive) testing
|
|
|
* This checks interrupt generation, event delivery and PHY presence. */
|
|
|
|
|
@@ -678,11 +719,9 @@ int efx_selftest(struct efx_nic *efx, struct efx_self_tests *tests,
|
|
|
if (rc && !rc_test)
|
|
|
rc_test = rc;
|
|
|
|
|
|
- efx_for_each_channel(channel, efx) {
|
|
|
- rc = efx_test_eventq_irq(channel, tests);
|
|
|
- if (rc && !rc_test)
|
|
|
- rc_test = rc;
|
|
|
- }
|
|
|
+ rc = efx_test_eventq_irq(efx, tests);
|
|
|
+ if (rc && !rc_test)
|
|
|
+ rc_test = rc;
|
|
|
|
|
|
if (rc_test)
|
|
|
return rc_test;
|
|
@@ -757,3 +796,36 @@ int efx_selftest(struct efx_nic *efx, struct efx_self_tests *tests,
|
|
|
return rc_test;
|
|
|
}
|
|
|
|
|
|
+void efx_selftest_async_start(struct efx_nic *efx)
|
|
|
+{
|
|
|
+ struct efx_channel *channel;
|
|
|
+
|
|
|
+ efx_for_each_channel(channel, efx)
|
|
|
+ efx_nic_event_test_start(channel);
|
|
|
+ schedule_delayed_work(&efx->selftest_work, IRQ_TIMEOUT);
|
|
|
+}
|
|
|
+
|
|
|
+void efx_selftest_async_cancel(struct efx_nic *efx)
|
|
|
+{
|
|
|
+ cancel_delayed_work_sync(&efx->selftest_work);
|
|
|
+}
|
|
|
+
|
|
|
+void efx_selftest_async_work(struct work_struct *data)
|
|
|
+{
|
|
|
+ struct efx_nic *efx = container_of(data, struct efx_nic,
|
|
|
+ selftest_work.work);
|
|
|
+ struct efx_channel *channel;
|
|
|
+ int cpu;
|
|
|
+
|
|
|
+ efx_for_each_channel(channel, efx) {
|
|
|
+ cpu = efx_nic_event_test_irq_cpu(channel);
|
|
|
+ if (cpu < 0)
|
|
|
+ netif_err(efx, ifup, efx->net_dev,
|
|
|
+ "channel %d failed to trigger an interrupt\n",
|
|
|
+ channel->channel);
|
|
|
+ else
|
|
|
+ netif_dbg(efx, ifup, efx->net_dev,
|
|
|
+ "channel %d triggered interrupt on CPU %d\n",
|
|
|
+ channel->channel, cpu);
|
|
|
+ }
|
|
|
+}
|