17 years ago · 4460a860f7
--- a/drivers/staging/echo/bit_operations.h
+++ b/drivers/staging/echo/bit_operations.h
@@ -36,14 +36,15 @@
 
				     \return The bit number of the highest set bit, or -1 if the word is zero. */
			
 
				 static __inline__ int top_bit(unsigned int bits)
			
 
				 {
			
 
				-    int res;
			
 
				-
			
 
				-    __asm__ (" xorl %[res],%[res];\n"
			
 
				-             " decl %[res];\n"
			
 
				-             " bsrl %[bits],%[res]\n"
			
 
				-             : [res] "=&r" (res)
			
 
				-             : [bits] "rm" (bits));
			
 
				-    return res;
			
 
				+	int res;
			
 
				+
			
 
				+	__asm__(" xorl %[res],%[res];\n"
			
 
				+		" decl %[res];\n"
			
 
				+		" bsrl %[bits],%[res]\n"
			
 
				+		:[res] "=&r" (res)
			
 
				+		:[bits] "rm"(bits)
			
 
				+	);
			
 
				+	return res;
			
 
				 }
			
 
				 
			
 
				 /*! \brief Find the bit position of the lowest set bit in a word
			
@@ -51,84 +52,75 @@ static __inline__ int top_bit(unsigned int bits)
 
				     \return The bit number of the lowest set bit, or -1 if the word is zero. */
			
 
				 static __inline__ int bottom_bit(unsigned int bits)
			
 
				 {
			
 
				-    int res;
			
 
				-
			
 
				-    __asm__ (" xorl %[res],%[res];\n"
			
 
				-             " decl %[res];\n"
			
 
				-             " bsfl %[bits],%[res]\n"
			
 
				-             : [res] "=&r" (res)
			
 
				-             : [bits] "rm" (bits));
			
 
				-    return res;
			
 
				+	int res;
			
 
				+
			
 
				+	__asm__(" xorl %[res],%[res];\n"
			
 
				+		" decl %[res];\n"
			
 
				+		" bsfl %[bits],%[res]\n"
			
 
				+		:[res] "=&r" (res)
			
 
				+		:[bits] "rm"(bits)
			
 
				+	);
			
 
				+	return res;
			
 
				 }
			
 
				 #else
			
 
				 static __inline__ int top_bit(unsigned int bits)
			
 
				 {
			
 
				-    int i;
			
 
				-
			
 
				-    if (bits == 0)
			
 
				-        return -1;
			
 
				-    i = 0;
			
 
				-    if (bits & 0xFFFF0000)
			
 
				-    {
			
 
				-        bits &= 0xFFFF0000;
			
 
				-        i += 16;
			
 
				-    }
			
 
				-    if (bits & 0xFF00FF00)
			
 
				-    {
			
 
				-        bits &= 0xFF00FF00;
			
 
				-        i += 8;
			
 
				-    }
			
 
				-    if (bits & 0xF0F0F0F0)
			
 
				-    {
			
 
				-        bits &= 0xF0F0F0F0;
			
 
				-        i += 4;
			
 
				-    }
			
 
				-    if (bits & 0xCCCCCCCC)
			
 
				-    {
			
 
				-        bits &= 0xCCCCCCCC;
			
 
				-        i += 2;
			
 
				-    }
			
 
				-    if (bits & 0xAAAAAAAA)
			
 
				-    {
			
 
				-        bits &= 0xAAAAAAAA;
			
 
				-        i += 1;
			
 
				-    }
			
 
				-    return i;
			
 
				+	int i;
			
 
				+
			
 
				+	if (bits == 0)
			
 
				+		return -1;
			
 
				+	i = 0;
			
 
				+	if (bits & 0xFFFF0000) {
			
 
				+		bits &= 0xFFFF0000;
			
 
				+		i += 16;
			
 
				+	}
			
 
				+	if (bits & 0xFF00FF00) {
			
 
				+		bits &= 0xFF00FF00;
			
 
				+		i += 8;
			
 
				+	}
			
 
				+	if (bits & 0xF0F0F0F0) {
			
 
				+		bits &= 0xF0F0F0F0;
			
 
				+		i += 4;
			
 
				+	}
			
 
				+	if (bits & 0xCCCCCCCC) {
			
 
				+		bits &= 0xCCCCCCCC;
			
 
				+		i += 2;
			
 
				+	}
			
 
				+	if (bits & 0xAAAAAAAA) {
			
 
				+		bits &= 0xAAAAAAAA;
			
 
				+		i += 1;
			
 
				+	}
			
 
				+	return i;
			
 
				 }
			
 
				 
			
 
				 static __inline__ int bottom_bit(unsigned int bits)
			
 
				 {
			
 
				-    int i;
			
 
				-
			
 
				-    if (bits == 0)
			
 
				-        return -1;
			
 
				-    i = 32;
			
 
				-    if (bits & 0x0000FFFF)
			
 
				-    {
			
 
				-        bits &= 0x0000FFFF;
			
 
				-        i -= 16;
			
 
				-    }
			
 
				-    if (bits & 0x00FF00FF)
			
 
				-    {
			
 
				-        bits &= 0x00FF00FF;
			
 
				-        i -= 8;
			
 
				-    }
			
 
				-    if (bits & 0x0F0F0F0F)
			
 
				-    {
			
 
				-        bits &= 0x0F0F0F0F;
			
 
				-        i -= 4;
			
 
				-    }
			
 
				-    if (bits & 0x33333333)
			
 
				-    {
			
 
				-        bits &= 0x33333333;
			
 
				-        i -= 2;
			
 
				-    }
			
 
				-    if (bits & 0x55555555)
			
 
				-    {
			
 
				-        bits &= 0x55555555;
			
 
				-        i -= 1;
			
 
				-    }
			
 
				-    return i;
			
 
				+	int i;
			
 
				+
			
 
				+	if (bits == 0)
			
 
				+		return -1;
			
 
				+	i = 32;
			
 
				+	if (bits & 0x0000FFFF) {
			
 
				+		bits &= 0x0000FFFF;
			
 
				+		i -= 16;
			
 
				+	}
			
 
				+	if (bits & 0x00FF00FF) {
			
 
				+		bits &= 0x00FF00FF;
			
 
				+		i -= 8;
			
 
				+	}
			
 
				+	if (bits & 0x0F0F0F0F) {
			
 
				+		bits &= 0x0F0F0F0F;
			
 
				+		i -= 4;
			
 
				+	}
			
 
				+	if (bits & 0x33333333) {
			
 
				+		bits &= 0x33333333;
			
 
				+		i -= 2;
			
 
				+	}
			
 
				+	if (bits & 0x55555555) {
			
 
				+		bits &= 0x55555555;
			
 
				+		i -= 1;
			
 
				+	}
			
 
				+	return i;
			
 
				 }
			
 
				 #endif
			
 
				 
			
@@ -138,13 +130,14 @@ static __inline__ int bottom_bit(unsigned int bits)
 
				 static __inline__ uint8_t bit_reverse8(uint8_t x)
			
 
				 {
			
 
				 #if defined(__i386__)  ||  defined(__x86_64__)
			
 
				-    /* If multiply is fast */
			
 
				-    return ((x*0x0802U & 0x22110U) | (x*0x8020U & 0x88440U))*0x10101U >> 16;
			
 
				+	/* If multiply is fast */
			
 
				+	return ((x * 0x0802U & 0x22110U) | (x * 0x8020U & 0x88440U)) *
			
 
				+	    0x10101U >> 16;
			
 
				 #else
			
 
				-    /* If multiply is slow, but we have a barrel shifter */
			
 
				-    x = (x >> 4) | (x << 4);
			
 
				-    x = ((x & 0xCC) >> 2) | ((x & 0x33) << 2);
			
 
				-    return ((x & 0xAA) >> 1) | ((x & 0x55) << 1);
			
 
				+	/* If multiply is slow, but we have a barrel shifter */
			
 
				+	x = (x >> 4) | (x << 4);
			
 
				+	x = ((x & 0xCC) >> 2) | ((x & 0x33) << 2);
			
 
				+	return ((x & 0xAA) >> 1) | ((x & 0x55) << 1);
			
 
				 #endif
			
 
				 }
			
 
				 
			
@@ -184,7 +177,7 @@ uint16_t make_mask16(uint16_t x);
 
				     \return The word with the single set bit. */
			
 
				 static __inline__ uint32_t least_significant_one32(uint32_t x)
			
 
				 {
			
 
				-    return (x & (-(int32_t) x));
			
 
				+	return (x & (-(int32_t) x));
			
 
				 }
			
 
				 
			
 
				 /*! \brief Find the most significant one in a word, and return a word
			
@@ -194,10 +187,10 @@ static __inline__ uint32_t least_significant_one32(uint32_t x)
 
				 static __inline__ uint32_t most_significant_one32(uint32_t x)
			
 
				 {
			
 
				 #if defined(__i386__)  ||  defined(__x86_64__)
			
 
				-    return 1 << top_bit(x);
			
 
				+	return 1 << top_bit(x);
			
 
				 #else
			
 
				-    x = make_mask32(x);
			
 
				-    return (x ^ (x >> 1));
			
 
				+	x = make_mask32(x);
			
 
				+	return (x ^ (x >> 1));
			
 
				 #endif
			
 
				 }
			
 
				 
			
@@ -206,8 +199,8 @@ static __inline__ uint32_t most_significant_one32(uint32_t x)
 
				     \return 1 for odd, or 0 for even. */
			
 
				 static __inline__ int parity8(uint8_t x)
			
 
				 {
			
 
				-    x = (x ^ (x >> 4)) & 0x0F;
			
 
				-    return (0x6996 >> x) & 1;
			
 
				+	x = (x ^ (x >> 4)) & 0x0F;
			
 
				+	return (0x6996 >> x) & 1;
			
 
				 }
			
 
				 
			
 
				 /*! \brief Find the parity of a 16 bit word.
			
@@ -215,9 +208,9 @@ static __inline__ int parity8(uint8_t x)
 
				     \return 1 for odd, or 0 for even. */
			
 
				 static __inline__ int parity16(uint16_t x)
			
 
				 {
			
 
				-    x ^= (x >> 8);
			
 
				-    x = (x ^ (x >> 4)) & 0x0F;
			
 
				-    return (0x6996 >> x) & 1;
			
 
				+	x ^= (x >> 8);
			
 
				+	x = (x ^ (x >> 4)) & 0x0F;
			
 
				+	return (0x6996 >> x) & 1;
			
 
				 }
			
 
				 
			
 
				 /*! \brief Find the parity of a 32 bit word.
			
@@ -225,10 +218,10 @@ static __inline__ int parity16(uint16_t x)
 
				     \return 1 for odd, or 0 for even. */
			
 
				 static __inline__ int parity32(uint32_t x)
			
 
				 {
			
 
				-    x ^= (x >> 16);
			
 
				-    x ^= (x >> 8);
			
 
				-    x = (x ^ (x >> 4)) & 0x0F;
			
 
				-    return (0x6996 >> x) & 1;
			
 
				+	x ^= (x >> 16);
			
 
				+	x ^= (x >> 8);
			
 
				+	x = (x ^ (x >> 4)) & 0x0F;
			
 
				+	return (0x6996 >> x) & 1;
			
 
				 }
			
 
				 
			
 
				 #endif
			
--- a/drivers/staging/echo/echo.c
+++ b/drivers/staging/echo/echo.c
@@ -74,7 +74,6 @@
 
				 
			
 
				    Steve also has some nice notes on echo cancellers in echo.h
			
 
				 
			
 
				-
			
 
				    References:
			
 
				 
			
 
				    [1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo
			
@@ -105,7 +104,7 @@
 
				    Mark, Pawel, and Pavel.
			
 
				 */
			
 
				 
			
 
				-#include <linux/kernel.h>       /* We're doing kernel work */
			
 
				+#include <linux/kernel.h>	/* We're doing kernel work */
			
 
				 #include <linux/module.h>
			
 
				 #include <linux/kernel.h>
			
 
				 #include <linux/slab.h>
			
@@ -115,8 +114,8 @@
 
				 
			
 
				 #define MIN_TX_POWER_FOR_ADAPTION   64
			
 
				 #define MIN_RX_POWER_FOR_ADAPTION   64
			
 
				-#define DTD_HANGOVER               600     /* 600 samples, or 75ms     */
			
 
				-#define DC_LOG2BETA                  3     /* log2() of DC filter Beta */
			
 
				+#define DTD_HANGOVER               600	/* 600 samples, or 75ms     */
			
 
				+#define DC_LOG2BETA                  3	/* log2() of DC filter Beta */
			
 
				 
			
 
				 /*-----------------------------------------------------------------------*\
			
 
				                                FUNCTIONS
			
@@ -124,59 +123,58 @@
 
				 
			
 
				 /* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */
			
 
				 
			
 
				-
			
 
				 #ifdef __bfin__
			
 
				-static void __inline__ lms_adapt_bg(struct oslec_state *ec, int clean, int shift)
			
 
				+static void __inline__ lms_adapt_bg(struct oslec_state *ec, int clean,
			
 
				+				    int shift)
			
 
				 {
			
 
				-    int i, j;
			
 
				-    int offset1;
			
 
				-    int offset2;
			
 
				-    int factor;
			
 
				-    int exp;
			
 
				-    int16_t *phist;
			
 
				-    int n;
			
 
				-
			
 
				-    if (shift > 0)
			
 
				-	factor = clean << shift;
			
 
				-    else
			
 
				-	factor = clean >> -shift;
			
 
				-
			
 
				-    /* Update the FIR taps */
			
 
				-
			
 
				-    offset2 = ec->curr_pos;
			
 
				-    offset1 = ec->taps - offset2;
			
 
				-    phist = &ec->fir_state_bg.history[offset2];
			
 
				-
			
 
				-    /* st: and en: help us locate the assembler in echo.s */
			
 
				-
			
 
				-    //asm("st:");
			
 
				-    n = ec->taps;
			
 
				-    for (i = 0, j = offset2;  i < n;  i++, j++)
			
 
				-    {
			
 
				-       exp = *phist++ * factor;
			
 
				-       ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
			
 
				-    }
			
 
				-    //asm("en:");
			
 
				-
			
 
				-    /* Note the asm for the inner loop above generated by Blackfin gcc
			
 
				-       4.1.1 is pretty good (note even parallel instructions used):
			
 
				-
			
 
				-    	R0 = W [P0++] (X);
			
 
				-	R0 *= R2;
			
 
				-	R0 = R0 + R3 (NS) ||
			
 
				-	R1 = W [P1] (X) ||
			
 
				-	nop;
			
 
				-	R0 >>>= 15;
			
 
				-	R0 = R0 + R1;
			
 
				-	W [P1++] = R0;
			
 
				-
			
 
				-	A block based update algorithm would be much faster but the
			
 
				-	above can't be improved on much.  Every instruction saved in
			
 
				-	the loop above is 2 MIPs/ch!  The for loop above is where the
			
 
				-	Blackfin spends most of it's time - about 17 MIPs/ch measured
			
 
				-	with speedtest.c with 256 taps (32ms).  Write-back and
			
 
				-	Write-through cache gave about the same performance.
			
 
				-    */
			
 
				+	int i, j;
			
 
				+	int offset1;
			
 
				+	int offset2;
			
 
				+	int factor;
			
 
				+	int exp;
			
 
				+	int16_t *phist;
			
 
				+	int n;
			
 
				+
			
 
				+	if (shift > 0)
			
 
				+		factor = clean << shift;
			
 
				+	else
			
 
				+		factor = clean >> -shift;
			
 
				+
			
 
				+	/* Update the FIR taps */
			
 
				+
			
 
				+	offset2 = ec->curr_pos;
			
 
				+	offset1 = ec->taps - offset2;
			
 
				+	phist = &ec->fir_state_bg.history[offset2];
			
 
				+
			
 
				+	/* st: and en: help us locate the assembler in echo.s */
			
 
				+
			
 
				+	//asm("st:");
			
 
				+	n = ec->taps;
			
 
				+	for (i = 0, j = offset2; i < n; i++, j++) {
			
 
				+		exp = *phist++ * factor;
			
 
				+		ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
			
 
				+	}
			
 
				+	//asm("en:");
			
 
				+
			
 
				+	/* Note the asm for the inner loop above generated by Blackfin gcc
			
 
				+	   4.1.1 is pretty good (note even parallel instructions used):
			
 
				+
			
 
				+	   R0 = W [P0++] (X);
			
 
				+	   R0 *= R2;
			
 
				+	   R0 = R0 + R3 (NS) ||
			
 
				+	   R1 = W [P1] (X) ||
			
 
				+	   nop;
			
 
				+	   R0 >>>= 15;
			
 
				+	   R0 = R0 + R1;
			
 
				+	   W [P1++] = R0;
			
 
				+
			
 
				+	   A block based update algorithm would be much faster but the
			
 
				+	   above can't be improved on much.  Every instruction saved in
			
 
				+	   the loop above is 2 MIPs/ch!  The for loop above is where the
			
 
				+	   Blackfin spends most of it's time - about 17 MIPs/ch measured
			
 
				+	   with speedtest.c with 256 taps (32ms).  Write-back and
			
 
				+	   Write-through cache gave about the same performance.
			
 
				+	 */
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -198,94 +196,90 @@ static void __inline__ lms_adapt_bg(struct oslec_state *ec, int clean, int shift
 
				 */
			
 
				 
			
 
				 #else
			
 
				-static __inline__ void lms_adapt_bg(struct oslec_state *ec, int clean, int shift)
			
 
				+static __inline__ void lms_adapt_bg(struct oslec_state *ec, int clean,
			
 
				+				    int shift)
			
 
				 {
			
 
				-    int i;
			
 
				-
			
 
				-    int offset1;
			
 
				-    int offset2;
			
 
				-    int factor;
			
 
				-    int exp;
			
 
				-
			
 
				-    if (shift > 0)
			
 
				-	factor = clean << shift;
			
 
				-    else
			
 
				-	factor = clean >> -shift;
			
 
				-
			
 
				-    /* Update the FIR taps */
			
 
				-
			
 
				-    offset2 = ec->curr_pos;
			
 
				-    offset1 = ec->taps - offset2;
			
 
				-
			
 
				-    for (i = ec->taps - 1;  i >= offset1;  i--)
			
 
				-    {
			
 
				-       exp = (ec->fir_state_bg.history[i - offset1]*factor);
			
 
				-       ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
			
 
				-    }
			
 
				-    for (  ;  i >= 0;  i--)
			
 
				-    {
			
 
				-       exp = (ec->fir_state_bg.history[i + offset2]*factor);
			
 
				-       ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
			
 
				-    }
			
 
				+	int i;
			
 
				+
			
 
				+	int offset1;
			
 
				+	int offset2;
			
 
				+	int factor;
			
 
				+	int exp;
			
 
				+
			
 
				+	if (shift > 0)
			
 
				+		factor = clean << shift;
			
 
				+	else
			
 
				+		factor = clean >> -shift;
			
 
				+
			
 
				+	/* Update the FIR taps */
			
 
				+
			
 
				+	offset2 = ec->curr_pos;
			
 
				+	offset1 = ec->taps - offset2;
			
 
				+
			
 
				+	for (i = ec->taps - 1; i >= offset1; i--) {
			
 
				+		exp = (ec->fir_state_bg.history[i - offset1] * factor);
			
 
				+		ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
			
 
				+	}
			
 
				+	for (; i >= 0; i--) {
			
 
				+		exp = (ec->fir_state_bg.history[i + offset2] * factor);
			
 
				+		ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
			
 
				+	}
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-
			
 
				 struct oslec_state *oslec_create(int len, int adaption_mode)
			
 
				 {
			
 
				-    struct oslec_state *ec;
			
 
				-    int i;
			
 
				-
			
 
				-    ec = kzalloc(sizeof(*ec), GFP_KERNEL);
			
 
				-    if (!ec)
			
 
				-        return NULL;
			
 
				-
			
 
				-    ec->taps = len;
			
 
				-    ec->log2taps = top_bit(len);
			
 
				-    ec->curr_pos = ec->taps - 1;
			
 
				-
			
 
				-    for (i = 0; i < 2; i++) {
			
 
				-        ec->fir_taps16[i] = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
			
 
				-        if (!ec->fir_taps16[i])
			
 
				-	    goto error_oom;
			
 
				-    }
			
 
				-
			
 
				-    fir16_create(&ec->fir_state,
			
 
				-                 ec->fir_taps16[0],
			
 
				-                 ec->taps);
			
 
				-    fir16_create(&ec->fir_state_bg,
			
 
				-                 ec->fir_taps16[1],
			
 
				-                 ec->taps);
			
 
				-
			
 
				-    for(i=0; i<5; i++) {
			
 
				-      ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0;
			
 
				-    }
			
 
				-
			
 
				-    ec->cng_level = 1000;
			
 
				-    oslec_adaption_mode(ec, adaption_mode);
			
 
				-
			
 
				-    ec->snapshot = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
			
 
				-    if (!ec->snapshot)
			
 
				-        goto error_oom;
			
 
				-
			
 
				-    ec->cond_met = 0;
			
 
				-    ec->Pstates = 0;
			
 
				-    ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
			
 
				-    ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
			
 
				-    ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
			
 
				-    ec->Lbgn = ec->Lbgn_acc = 0;
			
 
				-    ec->Lbgn_upper = 200;
			
 
				-    ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
			
 
				-
			
 
				-    return  ec;
			
 
				-
			
 
				-error_oom:
			
 
				-    for (i = 0; i < 2; i++)
			
 
				-        kfree(ec->fir_taps16[i]);
			
 
				-
			
 
				-    kfree(ec);
			
 
				-    return NULL;
			
 
				+	struct oslec_state *ec;
			
 
				+	int i;
			
 
				+
			
 
				+	ec = kzalloc(sizeof(*ec), GFP_KERNEL);
			
 
				+	if (!ec)
			
 
				+		return NULL;
			
 
				+
			
 
				+	ec->taps = len;
			
 
				+	ec->log2taps = top_bit(len);
			
 
				+	ec->curr_pos = ec->taps - 1;
			
 
				+
			
 
				+	for (i = 0; i < 2; i++) {
			
 
				+		ec->fir_taps16[i] =
			
 
				+		    kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
			
 
				+		if (!ec->fir_taps16[i])
			
 
				+			goto error_oom;
			
 
				+	}
			
 
				+
			
 
				+	fir16_create(&ec->fir_state, ec->fir_taps16[0], ec->taps);
			
 
				+	fir16_create(&ec->fir_state_bg, ec->fir_taps16[1], ec->taps);
			
 
				+
			
 
				+	for (i = 0; i < 5; i++) {
			
 
				+		ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0;
			
 
				+	}
			
 
				+
			
 
				+	ec->cng_level = 1000;
			
 
				+	oslec_adaption_mode(ec, adaption_mode);
			
 
				+
			
 
				+	ec->snapshot = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
			
 
				+	if (!ec->snapshot)
			
 
				+		goto error_oom;
			
 
				+
			
 
				+	ec->cond_met = 0;
			
 
				+	ec->Pstates = 0;
			
 
				+	ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
			
 
				+	ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
			
 
				+	ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
			
 
				+	ec->Lbgn = ec->Lbgn_acc = 0;
			
 
				+	ec->Lbgn_upper = 200;
			
 
				+	ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
			
 
				+
			
 
				+	return ec;
			
 
				+
			
 
				+      error_oom:
			
 
				+	for (i = 0; i < 2; i++)
			
 
				+		kfree(ec->fir_taps16[i]);
			
 
				+
			
 
				+	kfree(ec);
			
 
				+	return NULL;
			
 
				 }
			
 
				+
			
 
				 EXPORT_SYMBOL_GPL(oslec_create);
			
 
				 
			
 
				 void oslec_free(struct oslec_state *ec)
			
@@ -294,293 +288,300 @@ void oslec_free(struct oslec_state *ec)
 
				 
			
 
				 	fir16_free(&ec->fir_state);
			
 
				 	fir16_free(&ec->fir_state_bg);
			
 
				-	for (i = 0;  i < 2;  i++)
			
 
				+	for (i = 0; i < 2; i++)
			
 
				 		kfree(ec->fir_taps16[i]);
			
 
				 	kfree(ec->snapshot);
			
 
				 	kfree(ec);
			
 
				 }
			
 
				+
			
 
				 EXPORT_SYMBOL_GPL(oslec_free);
			
 
				 
			
 
				 void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode)
			
 
				 {
			
 
				-    ec->adaption_mode = adaption_mode;
			
 
				+	ec->adaption_mode = adaption_mode;
			
 
				 }
			
 
				+
			
 
				 EXPORT_SYMBOL_GPL(oslec_adaption_mode);
			
 
				 
			
 
				 void oslec_flush(struct oslec_state *ec)
			
 
				 {
			
 
				-    int i;
			
 
				+	int i;
			
 
				 
			
 
				-    ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
			
 
				-    ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
			
 
				-    ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
			
 
				+	ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
			
 
				+	ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
			
 
				+	ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
			
 
				 
			
 
				-    ec->Lbgn = ec->Lbgn_acc = 0;
			
 
				-    ec->Lbgn_upper = 200;
			
 
				-    ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
			
 
				+	ec->Lbgn = ec->Lbgn_acc = 0;
			
 
				+	ec->Lbgn_upper = 200;
			
 
				+	ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
			
 
				 
			
 
				-    ec->nonupdate_dwell = 0;
			
 
				+	ec->nonupdate_dwell = 0;
			
 
				 
			
 
				-    fir16_flush(&ec->fir_state);
			
 
				-    fir16_flush(&ec->fir_state_bg);
			
 
				-    ec->fir_state.curr_pos = ec->taps - 1;
			
 
				-    ec->fir_state_bg.curr_pos = ec->taps - 1;
			
 
				-    for (i = 0;  i < 2;  i++)
			
 
				-        memset(ec->fir_taps16[i], 0, ec->taps*sizeof(int16_t));
			
 
				+	fir16_flush(&ec->fir_state);
			
 
				+	fir16_flush(&ec->fir_state_bg);
			
 
				+	ec->fir_state.curr_pos = ec->taps - 1;
			
 
				+	ec->fir_state_bg.curr_pos = ec->taps - 1;
			
 
				+	for (i = 0; i < 2; i++)
			
 
				+		memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t));
			
 
				 
			
 
				-    ec->curr_pos = ec->taps - 1;
			
 
				-    ec->Pstates = 0;
			
 
				+	ec->curr_pos = ec->taps - 1;
			
 
				+	ec->Pstates = 0;
			
 
				 }
			
 
				+
			
 
				 EXPORT_SYMBOL_GPL(oslec_flush);
			
 
				 
			
 
				-void oslec_snapshot(struct oslec_state *ec) {
			
 
				-    memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps*sizeof(int16_t));
			
 
				+void oslec_snapshot(struct oslec_state *ec)
			
 
				+{
			
 
				+	memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps * sizeof(int16_t));
			
 
				 }
			
 
				+
			
 
				 EXPORT_SYMBOL_GPL(oslec_snapshot);
			
 
				 
			
 
				 /* Dual Path Echo Canceller ------------------------------------------------*/
			
 
				 
			
 
				 int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
			
 
				 {
			
 
				-    int32_t echo_value;
			
 
				-    int clean_bg;
			
 
				-    int tmp, tmp1;
			
 
				-
			
 
				-    /* Input scaling was found be required to prevent problems when tx
			
 
				-       starts clipping.  Another possible way to handle this would be the
			
 
				-       filter coefficent scaling. */
			
 
				-
			
 
				-    ec->tx = tx; ec->rx = rx;
			
 
				-    tx >>=1;
			
 
				-    rx >>=1;
			
 
				-
			
 
				-    /*
			
 
				-       Filter DC, 3dB point is 160Hz (I think), note 32 bit precision required
			
 
				-       otherwise values do not track down to 0. Zero at DC, Pole at (1-Beta)
			
 
				-       only real axis.  Some chip sets (like Si labs) don't need
			
 
				-       this, but something like a $10 X100P card does.  Any DC really slows
			
 
				-       down convergence.
			
 
				-
			
 
				-       Note: removes some low frequency from the signal, this reduces
			
 
				-       the speech quality when listening to samples through headphones
			
 
				-       but may not be obvious through a telephone handset.
			
 
				-
			
 
				-       Note that the 3dB frequency in radians is approx Beta, e.g. for
			
 
				-       Beta = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz.
			
 
				-    */
			
 
				-
			
 
				-    if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) {
			
 
				-      tmp = rx << 15;
			
 
				+	int32_t echo_value;
			
 
				+	int clean_bg;
			
 
				+	int tmp, tmp1;
			
 
				+
			
 
				+	/* Input scaling was found be required to prevent problems when tx
			
 
				+	   starts clipping.  Another possible way to handle this would be the
			
 
				+	   filter coefficent scaling. */
			
 
				+
			
 
				+	ec->tx = tx;
			
 
				+	ec->rx = rx;
			
 
				+	tx >>= 1;
			
 
				+	rx >>= 1;
			
 
				+
			
 
				+	/*
			
 
				+	   Filter DC, 3dB point is 160Hz (I think), note 32 bit precision required
			
 
				+	   otherwise values do not track down to 0. Zero at DC, Pole at (1-Beta)
			
 
				+	   only real axis.  Some chip sets (like Si labs) don't need
			
 
				+	   this, but something like a $10 X100P card does.  Any DC really slows
			
 
				+	   down convergence.
			
 
				+
			
 
				+	   Note: removes some low frequency from the signal, this reduces
			
 
				+	   the speech quality when listening to samples through headphones
			
 
				+	   but may not be obvious through a telephone handset.
			
 
				+
			
 
				+	   Note that the 3dB frequency in radians is approx Beta, e.g. for
			
 
				+	   Beta = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz.
			
 
				+	 */
			
 
				+
			
 
				+	if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) {
			
 
				+		tmp = rx << 15;
			
 
				 #if 1
			
 
				-        /* Make sure the gain of the HPF is 1.0. This can still saturate a little under
			
 
				-           impulse conditions, and it might roll to 32768 and need clipping on sustained peak
			
 
				-           level signals. However, the scale of such clipping is small, and the error due to
			
 
				-           any saturation should not markedly affect the downstream processing. */
			
 
				-        tmp -= (tmp >> 4);
			
 
				+		/* Make sure the gain of the HPF is 1.0. This can still saturate a little under
			
 
				+		   impulse conditions, and it might roll to 32768 and need clipping on sustained peak
			
 
				+		   level signals. However, the scale of such clipping is small, and the error due to
			
 
				+		   any saturation should not markedly affect the downstream processing. */
			
 
				+		tmp -= (tmp >> 4);
			
 
				 #endif
			
 
				-      ec->rx_1 += -(ec->rx_1>>DC_LOG2BETA) + tmp - ec->rx_2;
			
 
				+		ec->rx_1 += -(ec->rx_1 >> DC_LOG2BETA) + tmp - ec->rx_2;
			
 
				+
			
 
				+		/* hard limit filter to prevent clipping.  Note that at this stage
			
 
				+		   rx should be limited to +/- 16383 due to right shift above */
			
 
				+		tmp1 = ec->rx_1 >> 15;
			
 
				+		if (tmp1 > 16383)
			
 
				+			tmp1 = 16383;
			
 
				+		if (tmp1 < -16383)
			
 
				+			tmp1 = -16383;
			
 
				+		rx = tmp1;
			
 
				+		ec->rx_2 = tmp;
			
 
				+	}
			
 
				 
			
 
				-      /* hard limit filter to prevent clipping.  Note that at this stage
			
 
				-	 rx should be limited to +/- 16383 due to right shift above */
			
 
				-      tmp1 = ec->rx_1 >> 15;
			
 
				-      if (tmp1 > 16383) tmp1 = 16383;
			
 
				-      if (tmp1 < -16383) tmp1 = -16383;
			
 
				-      rx = tmp1;
			
 
				-      ec->rx_2 = tmp;
			
 
				-    }
			
 
				+	/* Block average of power in the filter states.  Used for
			
 
				+	   adaption power calculation. */
			
 
				 
			
 
				-    /* Block average of power in the filter states.  Used for
			
 
				-       adaption power calculation. */
			
 
				+	{
			
 
				+		int new, old;
			
 
				+
			
 
				+		/* efficient "out with the old and in with the new" algorithm so
			
 
				+		   we don't have to recalculate over the whole block of
			
 
				+		   samples. */
			
 
				+		new = (int)tx *(int)tx;
			
 
				+		old = (int)ec->fir_state.history[ec->fir_state.curr_pos] *
			
 
				+		    (int)ec->fir_state.history[ec->fir_state.curr_pos];
			
 
				+		ec->Pstates +=
			
 
				+		    ((new - old) + (1 << ec->log2taps)) >> ec->log2taps;
			
 
				+		if (ec->Pstates < 0)
			
 
				+			ec->Pstates = 0;
			
 
				+	}
			
 
				 
			
 
				-    {
			
 
				-	int new, old;
			
 
				+	/* Calculate short term average levels using simple single pole IIRs */
			
 
				 
			
 
				-	/* efficient "out with the old and in with the new" algorithm so
			
 
				-	   we don't have to recalculate over the whole block of
			
 
				-	   samples. */
			
 
				-	new = (int)tx * (int)tx;
			
 
				-	old = (int)ec->fir_state.history[ec->fir_state.curr_pos] *
			
 
				-              (int)ec->fir_state.history[ec->fir_state.curr_pos];
			
 
				-	ec->Pstates += ((new - old) + (1<<ec->log2taps)) >> ec->log2taps;
			
 
				-	if (ec->Pstates < 0) ec->Pstates = 0;
			
 
				-    }
			
 
				-
			
 
				-    /* Calculate short term average levels using simple single pole IIRs */
			
 
				-
			
 
				-    ec->Ltxacc += abs(tx) - ec->Ltx;
			
 
				-    ec->Ltx = (ec->Ltxacc + (1<<4)) >> 5;
			
 
				-    ec->Lrxacc += abs(rx) - ec->Lrx;
			
 
				-    ec->Lrx = (ec->Lrxacc + (1<<4)) >> 5;
			
 
				-
			
 
				-    /* Foreground filter ---------------------------------------------------*/
			
 
				-
			
 
				-    ec->fir_state.coeffs = ec->fir_taps16[0];
			
 
				-    echo_value = fir16(&ec->fir_state, tx);
			
 
				-    ec->clean = rx - echo_value;
			
 
				-    ec->Lcleanacc += abs(ec->clean) - ec->Lclean;
			
 
				-    ec->Lclean = (ec->Lcleanacc + (1<<4)) >> 5;
			
 
				-
			
 
				-    /* Background filter ---------------------------------------------------*/
			
 
				-
			
 
				-    echo_value = fir16(&ec->fir_state_bg, tx);
			
 
				-    clean_bg = rx - echo_value;
			
 
				-    ec->Lclean_bgacc += abs(clean_bg) - ec->Lclean_bg;
			
 
				-    ec->Lclean_bg = (ec->Lclean_bgacc + (1<<4)) >> 5;
			
 
				-
			
 
				-    /* Background Filter adaption -----------------------------------------*/
			
 
				-
			
 
				-    /* Almost always adap bg filter, just simple DT and energy
			
 
				-       detection to minimise adaption in cases of strong double talk.
			
 
				-       However this is not critical for the dual path algorithm.
			
 
				-    */
			
 
				-    ec->factor = 0;
			
 
				-    ec->shift = 0;
			
 
				-    if ((ec->nonupdate_dwell == 0)) {
			
 
				-	int   P, logP, shift;
			
 
				-
			
 
				-	/* Determine:
			
 
				-
			
 
				-	   f = Beta * clean_bg_rx/P ------ (1)
			
 
				-
			
 
				-	   where P is the total power in the filter states.
			
 
				-
			
 
				-	   The Boffins have shown that if we obey (1) we converge
			
 
				-	   quickly and avoid instability.
			
 
				-
			
 
				-	   The correct factor f must be in Q30, as this is the fixed
			
 
				-	   point format required by the lms_adapt_bg() function,
			
 
				-	   therefore the scaled version of (1) is:
			
 
				-
			
 
				-	   (2^30) * f  = (2^30) * Beta * clean_bg_rx/P
			
 
				-	       factor  = (2^30) * Beta * clean_bg_rx/P         ----- (2)
			
 
				-
			
 
				-	   We have chosen Beta = 0.25 by experiment, so:
			
 
				-
			
 
				-	       factor  = (2^30) * (2^-2) * clean_bg_rx/P
			
 
				-
			
 
				-                                       (30 - 2 - log2(P))
			
 
				-	       factor  = clean_bg_rx 2                         ----- (3)
			
 
				-
			
 
				-	   To avoid a divide we approximate log2(P) as top_bit(P),
			
 
				-	   which returns the position of the highest non-zero bit in
			
 
				-	   P.  This approximation introduces an error as large as a
			
 
				-	   factor of 2, but the algorithm seems to handle it OK.
			
 
				-
			
 
				-	   Come to think of it a divide may not be a big deal on a
			
 
				-	   modern DSP, so its probably worth checking out the cycles
			
 
				-	   for a divide versus a top_bit() implementation.
			
 
				-	*/
			
 
				-
			
 
				-	P = MIN_TX_POWER_FOR_ADAPTION + ec->Pstates;
			
 
				-	logP = top_bit(P) + ec->log2taps;
			
 
				-	shift = 30 - 2 - logP;
			
 
				-	ec->shift = shift;
			
 
				-
			
 
				-	lms_adapt_bg(ec, clean_bg, shift);
			
 
				-    }
			
 
				-
			
 
				-    /* very simple DTD to make sure we dont try and adapt with strong
			
 
				-       near end speech */
			
 
				-
			
 
				-    ec->adapt = 0;
			
 
				-    if ((ec->Lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->Lrx > ec->Ltx))
			
 
				-	ec->nonupdate_dwell = DTD_HANGOVER;
			
 
				-    if (ec->nonupdate_dwell)
			
 
				-	ec->nonupdate_dwell--;
			
 
				+	ec->Ltxacc += abs(tx) - ec->Ltx;
			
 
				+	ec->Ltx = (ec->Ltxacc + (1 << 4)) >> 5;
			
 
				+	ec->Lrxacc += abs(rx) - ec->Lrx;
			
 
				+	ec->Lrx = (ec->Lrxacc + (1 << 4)) >> 5;
			
 
				 
			
 
				-    /* Transfer logic ------------------------------------------------------*/
			
 
				+	/* Foreground filter --------------------------------------------------- */
			
 
				 
			
 
				-    /* These conditions are from the dual path paper [1], I messed with
			
 
				-       them a bit to improve performance. */
			
 
				+	ec->fir_state.coeffs = ec->fir_taps16[0];
			
 
				+	echo_value = fir16(&ec->fir_state, tx);
			
 
				+	ec->clean = rx - echo_value;
			
 
				+	ec->Lcleanacc += abs(ec->clean) - ec->Lclean;
			
 
				+	ec->Lclean = (ec->Lcleanacc + (1 << 4)) >> 5;
			
 
				 
			
 
				-    if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) &&
			
 
				-	(ec->nonupdate_dwell == 0) &&
			
 
				-	(8*ec->Lclean_bg < 7*ec->Lclean) /* (ec->Lclean_bg < 0.875*ec->Lclean) */ &&
			
 
				-	(8*ec->Lclean_bg < ec->Ltx)      /* (ec->Lclean_bg < 0.125*ec->Ltx)    */ )
			
 
				-    {
			
 
				-	if (ec->cond_met == 6) {
			
 
				-	    /* BG filter has had better results for 6 consecutive samples */
			
 
				-	    ec->adapt = 1;
			
 
				-	    memcpy(ec->fir_taps16[0], ec->fir_taps16[1], ec->taps*sizeof(int16_t));
			
 
				-	}
			
 
				-	else
			
 
				-	    ec->cond_met++;
			
 
				-    }
			
 
				-    else
			
 
				-	ec->cond_met = 0;
			
 
				+	/* Background filter --------------------------------------------------- */
			
 
				 
			
 
				-    /* Non-Linear Processing ---------------------------------------------------*/
			
 
				+	echo_value = fir16(&ec->fir_state_bg, tx);
			
 
				+	clean_bg = rx - echo_value;
			
 
				+	ec->Lclean_bgacc += abs(clean_bg) - ec->Lclean_bg;
			
 
				+	ec->Lclean_bg = (ec->Lclean_bgacc + (1 << 4)) >> 5;
			
 
				 
			
 
				-    ec->clean_nlp = ec->clean;
			
 
				-    if (ec->adaption_mode & ECHO_CAN_USE_NLP)
			
 
				-    {
			
 
				-        /* Non-linear processor - a fancy way to say "zap small signals, to avoid
			
 
				-           residual echo due to (uLaw/ALaw) non-linearity in the channel.". */
			
 
				+	/* Background Filter adaption ----------------------------------------- */
			
 
				 
			
 
				-      if ((16*ec->Lclean < ec->Ltx))
			
 
				-      {
			
 
				-	/* Our e/c has improved echo by at least 24 dB (each factor of 2 is 6dB,
			
 
				-	   so 2*2*2*2=16 is the same as 6+6+6+6=24dB) */
			
 
				-        if (ec->adaption_mode & ECHO_CAN_USE_CNG)
			
 
				-	{
			
 
				-	    ec->cng_level = ec->Lbgn;
			
 
				-
			
 
				-	    /* Very elementary comfort noise generation.  Just random
			
 
				-	       numbers rolled off very vaguely Hoth-like.  DR: This
			
 
				-	       noise doesn't sound quite right to me - I suspect there
			
 
				-	       are some overlfow issues in the filtering as it's too
			
 
				-	       "crackly".  TODO: debug this, maybe just play noise at
			
 
				-	       high level or look at spectrum.
			
 
				-	    */
			
 
				-
			
 
				-	    ec->cng_rndnum = 1664525U*ec->cng_rndnum + 1013904223U;
			
 
				-	    ec->cng_filter = ((ec->cng_rndnum & 0xFFFF) - 32768 + 5*ec->cng_filter) >> 3;
			
 
				-	    ec->clean_nlp = (ec->cng_filter*ec->cng_level*8) >> 14;
			
 
				-
			
 
				-        }
			
 
				-        else if (ec->adaption_mode & ECHO_CAN_USE_CLIP)
			
 
				-	{
			
 
				-	    /* This sounds much better than CNG */
			
 
				-	    if (ec->clean_nlp > ec->Lbgn)
			
 
				-	      ec->clean_nlp = ec->Lbgn;
			
 
				-	    if (ec->clean_nlp < -ec->Lbgn)
			
 
				-	      ec->clean_nlp = -ec->Lbgn;
			
 
				+	/* Almost always adap bg filter, just simple DT and energy
			
 
				+	   detection to minimise adaption in cases of strong double talk.
			
 
				+	   However this is not critical for the dual path algorithm.
			
 
				+	 */
			
 
				+	ec->factor = 0;
			
 
				+	ec->shift = 0;
			
 
				+	if ((ec->nonupdate_dwell == 0)) {
			
 
				+		int P, logP, shift;
			
 
				+
			
 
				+		/* Determine:
			
 
				+
			
 
				+		   f = Beta * clean_bg_rx/P ------ (1)
			
 
				+
			
 
				+		   where P is the total power in the filter states.
			
 
				+
			
 
				+		   The Boffins have shown that if we obey (1) we converge
			
 
				+		   quickly and avoid instability.
			
 
				+
			
 
				+		   The correct factor f must be in Q30, as this is the fixed
			
 
				+		   point format required by the lms_adapt_bg() function,
			
 
				+		   therefore the scaled version of (1) is:
			
 
				+
			
 
				+		   (2^30) * f  = (2^30) * Beta * clean_bg_rx/P
			
 
				+		   factor  = (2^30) * Beta * clean_bg_rx/P         ----- (2)
			
 
				+
			
 
				+		   We have chosen Beta = 0.25 by experiment, so:
			
 
				+
			
 
				+		   factor  = (2^30) * (2^-2) * clean_bg_rx/P
			
 
				+
			
 
				+		   (30 - 2 - log2(P))
			
 
				+		   factor  = clean_bg_rx 2                         ----- (3)
			
 
				+
			
 
				+		   To avoid a divide we approximate log2(P) as top_bit(P),
			
 
				+		   which returns the position of the highest non-zero bit in
			
 
				+		   P.  This approximation introduces an error as large as a
			
 
				+		   factor of 2, but the algorithm seems to handle it OK.
			
 
				+
			
 
				+		   Come to think of it a divide may not be a big deal on a
			
 
				+		   modern DSP, so its probably worth checking out the cycles
			
 
				+		   for a divide versus a top_bit() implementation.
			
 
				+		 */
			
 
				+
			
 
				+		P = MIN_TX_POWER_FOR_ADAPTION + ec->Pstates;
			
 
				+		logP = top_bit(P) + ec->log2taps;
			
 
				+		shift = 30 - 2 - logP;
			
 
				+		ec->shift = shift;
			
 
				+
			
 
				+		lms_adapt_bg(ec, clean_bg, shift);
			
 
				 	}
			
 
				-	else
			
 
				-        {
			
 
				-	  /* just mute the residual, doesn't sound very good, used mainly
			
 
				-	     in G168 tests */
			
 
				-          ec->clean_nlp = 0;
			
 
				-        }
			
 
				-      }
			
 
				-      else {
			
 
				-	  /* Background noise estimator.  I tried a few algorithms
			
 
				-	     here without much luck.  This very simple one seems to
			
 
				-	     work best, we just average the level using a slow (1 sec
			
 
				-	     time const) filter if the current level is less than a
			
 
				-	     (experimentally derived) constant.  This means we dont
			
 
				-	     include high level signals like near end speech.  When
			
 
				-	     combined with CNG or especially CLIP seems to work OK.
			
 
				-	  */
			
 
				-	  if (ec->Lclean < 40) {
			
 
				-	      ec->Lbgn_acc += abs(ec->clean) - ec->Lbgn;
			
 
				-	      ec->Lbgn = (ec->Lbgn_acc + (1<<11)) >> 12;
			
 
				-	  }
			
 
				-       }
			
 
				-    }
			
 
				-
			
 
				-    /* Roll around the taps buffer */
			
 
				-    if (ec->curr_pos <= 0)
			
 
				-        ec->curr_pos = ec->taps;
			
 
				-    ec->curr_pos--;
			
 
				-
			
 
				-    if (ec->adaption_mode & ECHO_CAN_DISABLE)
			
 
				-      ec->clean_nlp = rx;
			
 
				-
			
 
				-    /* Output scaled back up again to match input scaling */
			
 
				-
			
 
				-    return (int16_t) ec->clean_nlp << 1;
			
 
				+
			
 
				+	/* very simple DTD to make sure we dont try and adapt with strong
			
 
				+	   near end speech */
			
 
				+
			
 
				+	ec->adapt = 0;
			
 
				+	if ((ec->Lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->Lrx > ec->Ltx))
			
 
				+		ec->nonupdate_dwell = DTD_HANGOVER;
			
 
				+	if (ec->nonupdate_dwell)
			
 
				+		ec->nonupdate_dwell--;
			
 
				+
			
 
				+	/* Transfer logic ------------------------------------------------------ */
			
 
				+
			
 
				+	/* These conditions are from the dual path paper [1], I messed with
			
 
				+	   them a bit to improve performance. */
			
 
				+
			
 
				+	if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) &&
			
 
				+	    (ec->nonupdate_dwell == 0) &&
			
 
				+	    (8 * ec->Lclean_bg <
			
 
				+	     7 * ec->Lclean) /* (ec->Lclean_bg < 0.875*ec->Lclean) */ &&
			
 
				+	    (8 * ec->Lclean_bg <
			
 
				+	     ec->Ltx) /* (ec->Lclean_bg < 0.125*ec->Ltx)    */ ) {
			
 
				+		if (ec->cond_met == 6) {
			
 
				+			/* BG filter has had better results for 6 consecutive samples */
			
 
				+			ec->adapt = 1;
			
 
				+			memcpy(ec->fir_taps16[0], ec->fir_taps16[1],
			
 
				+			       ec->taps * sizeof(int16_t));
			
 
				+		} else
			
 
				+			ec->cond_met++;
			
 
				+	} else
			
 
				+		ec->cond_met = 0;
			
 
				+
			
 
				+	/* Non-Linear Processing --------------------------------------------------- */
			
 
				+
			
 
				+	ec->clean_nlp = ec->clean;
			
 
				+	if (ec->adaption_mode & ECHO_CAN_USE_NLP) {
			
 
				+		/* Non-linear processor - a fancy way to say "zap small signals, to avoid
			
 
				+		   residual echo due to (uLaw/ALaw) non-linearity in the channel.". */
			
 
				+
			
 
				+		if ((16 * ec->Lclean < ec->Ltx)) {
			
 
				+			/* Our e/c has improved echo by at least 24 dB (each factor of 2 is 6dB,
			
 
				+			   so 2*2*2*2=16 is the same as 6+6+6+6=24dB) */
			
 
				+			if (ec->adaption_mode & ECHO_CAN_USE_CNG) {
			
 
				+				ec->cng_level = ec->Lbgn;
			
 
				+
			
 
				+				/* Very elementary comfort noise generation.  Just random
			
 
				+				   numbers rolled off very vaguely Hoth-like.  DR: This
			
 
				+				   noise doesn't sound quite right to me - I suspect there
			
 
				+				   are some overlfow issues in the filtering as it's too
			
 
				+				   "crackly".  TODO: debug this, maybe just play noise at
			
 
				+				   high level or look at spectrum.
			
 
				+				 */
			
 
				+
			
 
				+				ec->cng_rndnum =
			
 
				+				    1664525U * ec->cng_rndnum + 1013904223U;
			
 
				+				ec->cng_filter =
			
 
				+				    ((ec->cng_rndnum & 0xFFFF) - 32768 +
			
 
				+				     5 * ec->cng_filter) >> 3;
			
 
				+				ec->clean_nlp =
			
 
				+				    (ec->cng_filter * ec->cng_level * 8) >> 14;
			
 
				+
			
 
				+			} else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) {
			
 
				+				/* This sounds much better than CNG */
			
 
				+				if (ec->clean_nlp > ec->Lbgn)
			
 
				+					ec->clean_nlp = ec->Lbgn;
			
 
				+				if (ec->clean_nlp < -ec->Lbgn)
			
 
				+					ec->clean_nlp = -ec->Lbgn;
			
 
				+			} else {
			
 
				+				/* just mute the residual, doesn't sound very good, used mainly
			
 
				+				   in G168 tests */
			
 
				+				ec->clean_nlp = 0;
			
 
				+			}
			
 
				+		} else {
			
 
				+			/* Background noise estimator.  I tried a few algorithms
			
 
				+			   here without much luck.  This very simple one seems to
			
 
				+			   work best, we just average the level using a slow (1 sec
			
 
				+			   time const) filter if the current level is less than a
			
 
				+			   (experimentally derived) constant.  This means we dont
			
 
				+			   include high level signals like near end speech.  When
			
 
				+			   combined with CNG or especially CLIP seems to work OK.
			
 
				+			 */
			
 
				+			if (ec->Lclean < 40) {
			
 
				+				ec->Lbgn_acc += abs(ec->clean) - ec->Lbgn;
			
 
				+				ec->Lbgn = (ec->Lbgn_acc + (1 << 11)) >> 12;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* Roll around the taps buffer */
			
 
				+	if (ec->curr_pos <= 0)
			
 
				+		ec->curr_pos = ec->taps;
			
 
				+	ec->curr_pos--;
			
 
				+
			
 
				+	if (ec->adaption_mode & ECHO_CAN_DISABLE)
			
 
				+		ec->clean_nlp = rx;
			
 
				+
			
 
				+	/* Output scaled back up again to match input scaling */
			
 
				+
			
 
				+	return (int16_t) ec->clean_nlp << 1;
			
 
				 }
			
 
				+
			
 
				 EXPORT_SYMBOL_GPL(oslec_update);
			
 
				 
			
 
				 /* This function is seperated from the echo canceller is it is usually called
			
@@ -604,28 +605,32 @@ EXPORT_SYMBOL_GPL(oslec_update);
 
				    precision, which noise shapes things, giving very clean DC removal.
			
 
				 */
			
 
				 
			
 
				-int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx) {
			
 
				-    int tmp, tmp1;
			
 
				+int16_t oslec_hpf_tx(struct oslec_state * ec, int16_t tx)
			
 
				+{
			
 
				+	int tmp, tmp1;
			
 
				 
			
 
				-    if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) {
			
 
				-        tmp = tx << 15;
			
 
				+	if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) {
			
 
				+		tmp = tx << 15;
			
 
				 #if 1
			
 
				-        /* Make sure the gain of the HPF is 1.0. The first can still saturate a little under
			
 
				-           impulse conditions, and it might roll to 32768 and need clipping on sustained peak
			
 
				-           level signals. However, the scale of such clipping is small, and the error due to
			
 
				-           any saturation should not markedly affect the downstream processing. */
			
 
				-        tmp -= (tmp >> 4);
			
 
				+		/* Make sure the gain of the HPF is 1.0. The first can still saturate a little under
			
 
				+		   impulse conditions, and it might roll to 32768 and need clipping on sustained peak
			
 
				+		   level signals. However, the scale of such clipping is small, and the error due to
			
 
				+		   any saturation should not markedly affect the downstream processing. */
			
 
				+		tmp -= (tmp >> 4);
			
 
				 #endif
			
 
				-        ec->tx_1 += -(ec->tx_1>>DC_LOG2BETA) + tmp - ec->tx_2;
			
 
				-        tmp1 = ec->tx_1 >> 15;
			
 
				-	if (tmp1 > 32767) tmp1 = 32767;
			
 
				-	if (tmp1 < -32767) tmp1 = -32767;
			
 
				-	tx = tmp1;
			
 
				-        ec->tx_2 = tmp;
			
 
				-    }
			
 
				-
			
 
				-    return tx;
			
 
				+		ec->tx_1 += -(ec->tx_1 >> DC_LOG2BETA) + tmp - ec->tx_2;
			
 
				+		tmp1 = ec->tx_1 >> 15;
			
 
				+		if (tmp1 > 32767)
			
 
				+			tmp1 = 32767;
			
 
				+		if (tmp1 < -32767)
			
 
				+			tmp1 = -32767;
			
 
				+		tx = tmp1;
			
 
				+		ec->tx_2 = tmp;
			
 
				+	}
			
 
				+
			
 
				+	return tx;
			
 
				 }
			
 
				+
			
 
				 EXPORT_SYMBOL_GPL(oslec_hpf_tx);
			
 
				 
			
 
				 MODULE_LICENSE("GPL");
			
--- a/drivers/staging/echo/echo.h
+++ b/drivers/staging/echo/echo.h
@@ -124,9 +124,8 @@ a minor burden.
 
				     G.168 echo canceller descriptor. This defines the working state for a line
			
 
				     echo canceller.
			
 
				 */
			
 
				-struct oslec_state
			
 
				-{
			
 
				-	int16_t tx,rx;
			
 
				+struct oslec_state {
			
 
				+	int16_t tx, rx;
			
 
				 	int16_t clean;
			
 
				 	int16_t clean_nlp;
			
 
				 
			
@@ -170,4 +169,4 @@ struct oslec_state
 
				 	int16_t *snapshot;
			
 
				 };
			
 
				 
			
 
				-#endif	/* __ECHO_H */
			
 
				+#endif /* __ECHO_H */
			
--- a/drivers/staging/echo/fir.h
+++ b/drivers/staging/echo/fir.h
@@ -72,8 +72,7 @@
 
				     16 bit integer FIR descriptor. This defines the working state for a single
			
 
				     instance of an FIR filter using 16 bit integer coefficients.
			
 
				 */
			
 
				-typedef struct
			
 
				-{
			
 
				+typedef struct {
			
 
				 	int taps;
			
 
				 	int curr_pos;
			
 
				 	const int16_t *coeffs;
			
@@ -85,8 +84,7 @@ typedef struct
 
				     instance of an FIR filter using 32 bit integer coefficients, and filtering
			
 
				     16 bit integer data.
			
 
				 */
			
 
				-typedef struct
			
 
				-{
			
 
				+typedef struct {
			
 
				 	int taps;
			
 
				 	int curr_pos;
			
 
				 	const int32_t *coeffs;
			
@@ -97,39 +95,37 @@ typedef struct
 
				     Floating point FIR descriptor. This defines the working state for a single
			
 
				     instance of an FIR filter using floating point coefficients and data.
			
 
				 */
			
 
				-typedef struct
			
 
				-{
			
 
				+typedef struct {
			
 
				 	int taps;
			
 
				 	int curr_pos;
			
 
				 	const float *coeffs;
			
 
				 	float *history;
			
 
				 } fir_float_state_t;
			
 
				 
			
 
				-static __inline__ const int16_t *fir16_create(fir16_state_t *fir,
			
 
				-                                              const int16_t *coeffs,
			
 
				-                                              int taps)
			
 
				+static __inline__ const int16_t *fir16_create(fir16_state_t * fir,
			
 
				+					      const int16_t * coeffs, int taps)
			
 
				 {
			
 
				 	fir->taps = taps;
			
 
				 	fir->curr_pos = taps - 1;
			
 
				 	fir->coeffs = coeffs;
			
 
				 #if defined(USE_MMX)  ||  defined(USE_SSE2) || defined(__bfin__)
			
 
				-	fir->history = kcalloc(2*taps, sizeof(int16_t), GFP_KERNEL);
			
 
				+	fir->history = kcalloc(2 * taps, sizeof(int16_t), GFP_KERNEL);
			
 
				 #else
			
 
				 	fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
			
 
				 #endif
			
 
				 	return fir->history;
			
 
				 }
			
 
				 
			
 
				-static __inline__ void fir16_flush(fir16_state_t *fir)
			
 
				+static __inline__ void fir16_flush(fir16_state_t * fir)
			
 
				 {
			
 
				 #if defined(USE_MMX)  ||  defined(USE_SSE2) || defined(__bfin__)
			
 
				-    memset(fir->history, 0, 2*fir->taps*sizeof(int16_t));
			
 
				+	memset(fir->history, 0, 2 * fir->taps * sizeof(int16_t));
			
 
				 #else
			
 
				-    memset(fir->history, 0, fir->taps*sizeof(int16_t));
			
 
				+	memset(fir->history, 0, fir->taps * sizeof(int16_t));
			
 
				 #endif
			
 
				 }
			
 
				 
			
 
				-static __inline__ void fir16_free(fir16_state_t *fir)
			
 
				+static __inline__ void fir16_free(fir16_state_t * fir)
			
 
				 {
			
 
				 	kfree(fir->history);
			
 
				 }
			
@@ -137,166 +133,162 @@ static __inline__ void fir16_free(fir16_state_t *fir)
 
				 #ifdef __bfin__
			
 
				 static inline int32_t dot_asm(short *x, short *y, int len)
			
 
				 {
			
 
				-   int dot;
			
 
				-
			
 
				-   len--;
			
 
				-
			
 
				-   __asm__
			
 
				-   (
			
 
				-   "I0 = %1;\n\t"
			
 
				-   "I1 = %2;\n\t"
			
 
				-   "A0 = 0;\n\t"
			
 
				-   "R0.L = W[I0++] || R1.L = W[I1++];\n\t"
			
 
				-   "LOOP dot%= LC0 = %3;\n\t"
			
 
				-   "LOOP_BEGIN dot%=;\n\t"
			
 
				-      "A0 += R0.L * R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t"
			
 
				-   "LOOP_END dot%=;\n\t"
			
 
				-   "A0 += R0.L*R1.L (IS);\n\t"
			
 
				-   "R0 = A0;\n\t"
			
 
				-   "%0 = R0;\n\t"
			
 
				-   : "=&d" (dot)
			
 
				-   : "a" (x), "a" (y), "a" (len)
			
 
				-   : "I0", "I1", "A1", "A0", "R0", "R1"
			
 
				-   );
			
 
				-
			
 
				-   return dot;
			
 
				+	int dot;
			
 
				+
			
 
				+	len--;
			
 
				+
			
 
				+	__asm__("I0 = %1;\n\t"
			
 
				+		"I1 = %2;\n\t"
			
 
				+		"A0 = 0;\n\t"
			
 
				+		"R0.L = W[I0++] || R1.L = W[I1++];\n\t"
			
 
				+		"LOOP dot%= LC0 = %3;\n\t"
			
 
				+		"LOOP_BEGIN dot%=;\n\t"
			
 
				+		"A0 += R0.L * R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t"
			
 
				+		"LOOP_END dot%=;\n\t"
			
 
				+		"A0 += R0.L*R1.L (IS);\n\t"
			
 
				+		"R0 = A0;\n\t"
			
 
				+		"%0 = R0;\n\t"
			
 
				+		:"=&d"(dot)
			
 
				+		:"a"(x), "a"(y), "a"(len)
			
 
				+		:"I0", "I1", "A1", "A0", "R0", "R1"
			
 
				+	);
			
 
				+
			
 
				+	return dot;
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static __inline__ int16_t fir16(fir16_state_t *fir, int16_t sample)
			
 
				+static __inline__ int16_t fir16(fir16_state_t * fir, int16_t sample)
			
 
				 {
			
 
				-    int32_t y;
			
 
				+	int32_t y;
			
 
				 #if defined(USE_MMX)
			
 
				-    int i;
			
 
				-    mmx_t *mmx_coeffs;
			
 
				-    mmx_t *mmx_hist;
			
 
				-
			
 
				-    fir->history[fir->curr_pos] = sample;
			
 
				-    fir->history[fir->curr_pos + fir->taps] = sample;
			
 
				-
			
 
				-    mmx_coeffs = (mmx_t *) fir->coeffs;
			
 
				-    mmx_hist = (mmx_t *) &fir->history[fir->curr_pos];
			
 
				-    i = fir->taps;
			
 
				-    pxor_r2r(mm4, mm4);
			
 
				-    /* 8 samples per iteration, so the filter must be a multiple of 8 long. */
			
 
				-    while (i > 0)
			
 
				-    {
			
 
				-        movq_m2r(mmx_coeffs[0], mm0);
			
 
				-        movq_m2r(mmx_coeffs[1], mm2);
			
 
				-        movq_m2r(mmx_hist[0], mm1);
			
 
				-        movq_m2r(mmx_hist[1], mm3);
			
 
				-        mmx_coeffs += 2;
			
 
				-        mmx_hist += 2;
			
 
				-        pmaddwd_r2r(mm1, mm0);
			
 
				-        pmaddwd_r2r(mm3, mm2);
			
 
				-        paddd_r2r(mm0, mm4);
			
 
				-        paddd_r2r(mm2, mm4);
			
 
				-        i -= 8;
			
 
				-    }
			
 
				-    movq_r2r(mm4, mm0);
			
 
				-    psrlq_i2r(32, mm0);
			
 
				-    paddd_r2r(mm0, mm4);
			
 
				-    movd_r2m(mm4, y);
			
 
				-    emms();
			
 
				+	int i;
			
 
				+	mmx_t *mmx_coeffs;
			
 
				+	mmx_t *mmx_hist;
			
 
				+
			
 
				+	fir->history[fir->curr_pos] = sample;
			
 
				+	fir->history[fir->curr_pos + fir->taps] = sample;
			
 
				+
			
 
				+	mmx_coeffs = (mmx_t *) fir->coeffs;
			
 
				+	mmx_hist = (mmx_t *) & fir->history[fir->curr_pos];
			
 
				+	i = fir->taps;
			
 
				+	pxor_r2r(mm4, mm4);
			
 
				+	/* 8 samples per iteration, so the filter must be a multiple of 8 long. */
			
 
				+	while (i > 0) {
			
 
				+		movq_m2r(mmx_coeffs[0], mm0);
			
 
				+		movq_m2r(mmx_coeffs[1], mm2);
			
 
				+		movq_m2r(mmx_hist[0], mm1);
			
 
				+		movq_m2r(mmx_hist[1], mm3);
			
 
				+		mmx_coeffs += 2;
			
 
				+		mmx_hist += 2;
			
 
				+		pmaddwd_r2r(mm1, mm0);
			
 
				+		pmaddwd_r2r(mm3, mm2);
			
 
				+		paddd_r2r(mm0, mm4);
			
 
				+		paddd_r2r(mm2, mm4);
			
 
				+		i -= 8;
			
 
				+	}
			
 
				+	movq_r2r(mm4, mm0);
			
 
				+	psrlq_i2r(32, mm0);
			
 
				+	paddd_r2r(mm0, mm4);
			
 
				+	movd_r2m(mm4, y);
			
 
				+	emms();
			
 
				 #elif defined(USE_SSE2)
			
 
				-    int i;
			
 
				-    xmm_t *xmm_coeffs;
			
 
				-    xmm_t *xmm_hist;
			
 
				-
			
 
				-    fir->history[fir->curr_pos] = sample;
			
 
				-    fir->history[fir->curr_pos + fir->taps] = sample;
			
 
				-
			
 
				-    xmm_coeffs = (xmm_t *) fir->coeffs;
			
 
				-    xmm_hist = (xmm_t *) &fir->history[fir->curr_pos];
			
 
				-    i = fir->taps;
			
 
				-    pxor_r2r(xmm4, xmm4);
			
 
				-    /* 16 samples per iteration, so the filter must be a multiple of 16 long. */
			
 
				-    while (i > 0)
			
 
				-    {
			
 
				-        movdqu_m2r(xmm_coeffs[0], xmm0);
			
 
				-        movdqu_m2r(xmm_coeffs[1], xmm2);
			
 
				-        movdqu_m2r(xmm_hist[0], xmm1);
			
 
				-        movdqu_m2r(xmm_hist[1], xmm3);
			
 
				-        xmm_coeffs += 2;
			
 
				-        xmm_hist += 2;
			
 
				-        pmaddwd_r2r(xmm1, xmm0);
			
 
				-        pmaddwd_r2r(xmm3, xmm2);
			
 
				-        paddd_r2r(xmm0, xmm4);
			
 
				-        paddd_r2r(xmm2, xmm4);
			
 
				-        i -= 16;
			
 
				-    }
			
 
				-    movdqa_r2r(xmm4, xmm0);
			
 
				-    psrldq_i2r(8, xmm0);
			
 
				-    paddd_r2r(xmm0, xmm4);
			
 
				-    movdqa_r2r(xmm4, xmm0);
			
 
				-    psrldq_i2r(4, xmm0);
			
 
				-    paddd_r2r(xmm0, xmm4);
			
 
				-    movd_r2m(xmm4, y);
			
 
				+	int i;
			
 
				+	xmm_t *xmm_coeffs;
			
 
				+	xmm_t *xmm_hist;
			
 
				+
			
 
				+	fir->history[fir->curr_pos] = sample;
			
 
				+	fir->history[fir->curr_pos + fir->taps] = sample;
			
 
				+
			
 
				+	xmm_coeffs = (xmm_t *) fir->coeffs;
			
 
				+	xmm_hist = (xmm_t *) & fir->history[fir->curr_pos];
			
 
				+	i = fir->taps;
			
 
				+	pxor_r2r(xmm4, xmm4);
			
 
				+	/* 16 samples per iteration, so the filter must be a multiple of 16 long. */
			
 
				+	while (i > 0) {
			
 
				+		movdqu_m2r(xmm_coeffs[0], xmm0);
			
 
				+		movdqu_m2r(xmm_coeffs[1], xmm2);
			
 
				+		movdqu_m2r(xmm_hist[0], xmm1);
			
 
				+		movdqu_m2r(xmm_hist[1], xmm3);
			
 
				+		xmm_coeffs += 2;
			
 
				+		xmm_hist += 2;
			
 
				+		pmaddwd_r2r(xmm1, xmm0);
			
 
				+		pmaddwd_r2r(xmm3, xmm2);
			
 
				+		paddd_r2r(xmm0, xmm4);
			
 
				+		paddd_r2r(xmm2, xmm4);
			
 
				+		i -= 16;
			
 
				+	}
			
 
				+	movdqa_r2r(xmm4, xmm0);
			
 
				+	psrldq_i2r(8, xmm0);
			
 
				+	paddd_r2r(xmm0, xmm4);
			
 
				+	movdqa_r2r(xmm4, xmm0);
			
 
				+	psrldq_i2r(4, xmm0);
			
 
				+	paddd_r2r(xmm0, xmm4);
			
 
				+	movd_r2m(xmm4, y);
			
 
				 #elif defined(__bfin__)
			
 
				-    fir->history[fir->curr_pos] = sample;
			
 
				-    fir->history[fir->curr_pos + fir->taps] = sample;
			
 
				-    y = dot_asm((int16_t*)fir->coeffs, &fir->history[fir->curr_pos], fir->taps);
			
 
				+	fir->history[fir->curr_pos] = sample;
			
 
				+	fir->history[fir->curr_pos + fir->taps] = sample;
			
 
				+	y = dot_asm((int16_t *) fir->coeffs, &fir->history[fir->curr_pos],
			
 
				+		    fir->taps);
			
 
				 #else
			
 
				-    int i;
			
 
				-    int offset1;
			
 
				-    int offset2;
			
 
				-
			
 
				-    fir->history[fir->curr_pos] = sample;
			
 
				-
			
 
				-    offset2 = fir->curr_pos;
			
 
				-    offset1 = fir->taps - offset2;
			
 
				-    y = 0;
			
 
				-    for (i = fir->taps - 1;  i >= offset1;  i--)
			
 
				-        y += fir->coeffs[i]*fir->history[i - offset1];
			
 
				-    for (  ;  i >= 0;  i--)
			
 
				-        y += fir->coeffs[i]*fir->history[i + offset2];
			
 
				+	int i;
			
 
				+	int offset1;
			
 
				+	int offset2;
			
 
				+
			
 
				+	fir->history[fir->curr_pos] = sample;
			
 
				+
			
 
				+	offset2 = fir->curr_pos;
			
 
				+	offset1 = fir->taps - offset2;
			
 
				+	y = 0;
			
 
				+	for (i = fir->taps - 1; i >= offset1; i--)
			
 
				+		y += fir->coeffs[i] * fir->history[i - offset1];
			
 
				+	for (; i >= 0; i--)
			
 
				+		y += fir->coeffs[i] * fir->history[i + offset2];
			
 
				 #endif
			
 
				-    if (fir->curr_pos <= 0)
			
 
				-    	fir->curr_pos = fir->taps;
			
 
				-    fir->curr_pos--;
			
 
				-    return (int16_t) (y >> 15);
			
 
				+	if (fir->curr_pos <= 0)
			
 
				+		fir->curr_pos = fir->taps;
			
 
				+	fir->curr_pos--;
			
 
				+	return (int16_t) (y >> 15);
			
 
				 }
			
 
				 
			
 
				-static __inline__ const int16_t *fir32_create(fir32_state_t *fir,
			
 
				-                                              const int32_t *coeffs,
			
 
				-                                              int taps)
			
 
				+static __inline__ const int16_t *fir32_create(fir32_state_t * fir,
			
 
				+					      const int32_t * coeffs, int taps)
			
 
				 {
			
 
				-    fir->taps = taps;
			
 
				-    fir->curr_pos = taps - 1;
			
 
				-    fir->coeffs = coeffs;
			
 
				-    fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
			
 
				-    return fir->history;
			
 
				+	fir->taps = taps;
			
 
				+	fir->curr_pos = taps - 1;
			
 
				+	fir->coeffs = coeffs;
			
 
				+	fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
			
 
				+	return fir->history;
			
 
				 }
			
 
				 
			
 
				-static __inline__ void fir32_flush(fir32_state_t *fir)
			
 
				+static __inline__ void fir32_flush(fir32_state_t * fir)
			
 
				 {
			
 
				-    memset(fir->history, 0, fir->taps*sizeof(int16_t));
			
 
				+	memset(fir->history, 0, fir->taps * sizeof(int16_t));
			
 
				 }
			
 
				 
			
 
				-static __inline__ void fir32_free(fir32_state_t *fir)
			
 
				+static __inline__ void fir32_free(fir32_state_t * fir)
			
 
				 {
			
 
				-    kfree(fir->history);
			
 
				+	kfree(fir->history);
			
 
				 }
			
 
				 
			
 
				-static __inline__ int16_t fir32(fir32_state_t *fir, int16_t sample)
			
 
				+static __inline__ int16_t fir32(fir32_state_t * fir, int16_t sample)
			
 
				 {
			
 
				-    int i;
			
 
				-    int32_t y;
			
 
				-    int offset1;
			
 
				-    int offset2;
			
 
				-
			
 
				-    fir->history[fir->curr_pos] = sample;
			
 
				-    offset2 = fir->curr_pos;
			
 
				-    offset1 = fir->taps - offset2;
			
 
				-    y = 0;
			
 
				-    for (i = fir->taps - 1;  i >= offset1;  i--)
			
 
				-        y += fir->coeffs[i]*fir->history[i - offset1];
			
 
				-    for (  ;  i >= 0;  i--)
			
 
				-        y += fir->coeffs[i]*fir->history[i + offset2];
			
 
				-    if (fir->curr_pos <= 0)
			
 
				-    	fir->curr_pos = fir->taps;
			
 
				-    fir->curr_pos--;
			
 
				-    return (int16_t) (y >> 15);
			
 
				+	int i;
			
 
				+	int32_t y;
			
 
				+	int offset1;
			
 
				+	int offset2;
			
 
				+
			
 
				+	fir->history[fir->curr_pos] = sample;
			
 
				+	offset2 = fir->curr_pos;
			
 
				+	offset1 = fir->taps - offset2;
			
 
				+	y = 0;
			
 
				+	for (i = fir->taps - 1; i >= offset1; i--)
			
 
				+		y += fir->coeffs[i] * fir->history[i - offset1];
			
 
				+	for (; i >= 0; i--)
			
 
				+		y += fir->coeffs[i] * fir->history[i + offset2];
			
 
				+	if (fir->curr_pos <= 0)
			
 
				+		fir->curr_pos = fir->taps;
			
 
				+	fir->curr_pos--;
			
 
				+	return (int16_t) (y >> 15);
			
 
				 }
			
 
				 
			
 
				 #endif
			
--- a/drivers/staging/echo/mmx.h
+++ b/drivers/staging/echo/mmx.h
@@ -27,24 +27,23 @@
 
				  * values by ULL, lest they be truncated by the compiler)
			
 
				  */
			
 
				 
			
 
				-typedef        union {
			
 
				-        long long               q;      /* Quadword (64-bit) value */
			
 
				-        unsigned long long      uq;     /* Unsigned Quadword */
			
 
				-        int                     d[2];   /* 2 Doubleword (32-bit) values */
			
 
				-        unsigned int            ud[2];  /* 2 Unsigned Doubleword */
			
 
				-        short                   w[4];   /* 4 Word (16-bit) values */
			
 
				-        unsigned short          uw[4];  /* 4 Unsigned Word */
			
 
				-        char                    b[8];   /* 8 Byte (8-bit) values */
			
 
				-        unsigned char           ub[8];  /* 8 Unsigned Byte */
			
 
				-        float                   s[2];   /* Single-precision (32-bit) value */
			
 
				-} mmx_t;        /* On an 8-byte (64-bit) boundary */
			
 
				+typedef union {
			
 
				+	long long q;		/* Quadword (64-bit) value */
			
 
				+	unsigned long long uq;	/* Unsigned Quadword */
			
 
				+	int d[2];		/* 2 Doubleword (32-bit) values */
			
 
				+	unsigned int ud[2];	/* 2 Unsigned Doubleword */
			
 
				+	short w[4];		/* 4 Word (16-bit) values */
			
 
				+	unsigned short uw[4];	/* 4 Unsigned Word */
			
 
				+	char b[8];		/* 8 Byte (8-bit) values */
			
 
				+	unsigned char ub[8];	/* 8 Unsigned Byte */
			
 
				+	float s[2];		/* Single-precision (32-bit) value */
			
 
				+} mmx_t;			/* On an 8-byte (64-bit) boundary */
			
 
				 
			
 
				 /* SSE registers */
			
 
				 typedef union {
			
 
				 	char b[16];
			
 
				 } xmm_t;
			
 
				 
			
 
				-
			
 
				 #define         mmx_i2r(op,imm,reg) \
			
 
				         __asm__ __volatile__ (#op " %0, %%" #reg \
			
 
				                               : /* nothing */ \
			
@@ -63,7 +62,6 @@ typedef union {
 
				 #define         mmx_r2r(op,regs,regd) \
			
 
				         __asm__ __volatile__ (#op " %" #regs ", %" #regd)
			
 
				 
			
 
				-
			
 
				 #define         emms() __asm__ __volatile__ ("emms")
			
 
				 
			
 
				 #define         movd_m2r(var,reg)           mmx_m2r (movd, var, reg)
			
@@ -192,16 +190,13 @@ typedef union {
 
				 #define         pxor_m2r(var,reg)           mmx_m2r (pxor, var, reg)
			
 
				 #define         pxor_r2r(regs,regd)         mmx_r2r (pxor, regs, regd)
			
 
				 
			
 
				-
			
 
				 /* 3DNOW extensions */
			
 
				 
			
 
				 #define         pavgusb_m2r(var,reg)        mmx_m2r (pavgusb, var, reg)
			
 
				 #define         pavgusb_r2r(regs,regd)      mmx_r2r (pavgusb, regs, regd)
			
 
				 
			
 
				-
			
 
				 /* AMD MMX extensions - also available in intel SSE */
			
 
				 
			
 
				-
			
 
				 #define         mmx_m2ri(op,mem,reg,imm) \
			
 
				         __asm__ __volatile__ (#op " %1, %0, %%" #reg \
			
 
				                               : /* nothing */ \
			
@@ -216,7 +211,6 @@ typedef union {
 
				                               : /* nothing */ \
			
 
				                               : "m" (mem))
			
 
				 
			
 
				-
			
 
				 #define         maskmovq(regs,maskreg)      mmx_r2ri (maskmovq, regs, maskreg)
			
 
				 
			
 
				 #define         movntq_r2m(mmreg,var)       mmx_r2m (movntq, mmreg, var)
			
@@ -284,5 +278,4 @@ typedef union {
 
				 #define         punpcklqdq_r2r(regs,regd)   mmx_r2r (punpcklqdq, regs, regd)
			
 
				 #define         punpckhqdq_r2r(regs,regd)   mmx_r2r (punpckhqdq, regs, regd)
			
 
				 
			
 
				-
			
 
				 #endif /* AVCODEC_I386MMX_H */
			
--- a/drivers/staging/echo/oslec.h
+++ b/drivers/staging/echo/oslec.h
@@ -83,4 +83,4 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx);
 
				 */
			
 
				 int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx);
			
 
				 
			
 
				-#endif	/* __OSLEC_H */
			
 
				+#endif /* __OSLEC_H */