|
@@ -82,9 +82,9 @@
|
|
|
|
|
|
[2] The classic, very useful paper that tells you how to
|
|
|
actually build a real world echo canceller:
|
|
|
- Messerschmitt, Hedberg, Cole, Haoui, Winship, "Digital Voice
|
|
|
- Echo Canceller with a TMS320020,
|
|
|
- http://www.rowetel.com/images/echo/spra129.pdf
|
|
|
+ Messerschmitt, Hedberg, Cole, Haoui, Winship, "Digital Voice
|
|
|
+ Echo Canceller with a TMS320020,
|
|
|
+ http://www.rowetel.com/images/echo/spra129.pdf
|
|
|
|
|
|
[3] I have written a series of blog posts on this work, here is
|
|
|
Part 1: http://www.rowetel.com/blog/?p=18
|
|
@@ -92,7 +92,7 @@
|
|
|
[4] The source code http://svn.rowetel.com/software/oslec/
|
|
|
|
|
|
[5] A nice reference on LMS filters:
|
|
|
- http://en.wikipedia.org/wiki/Least_mean_squares_filter
|
|
|
+ http://en.wikipedia.org/wiki/Least_mean_squares_filter
|
|
|
|
|
|
Credits:
|
|
|
|
|
@@ -102,21 +102,18 @@
|
|
|
Mark, Pawel, and Pavel.
|
|
|
*/
|
|
|
|
|
|
-#include <linux/kernel.h> /* We're doing kernel work */
|
|
|
+#include <linux/kernel.h>
|
|
|
#include <linux/module.h>
|
|
|
#include <linux/slab.h>
|
|
|
|
|
|
#include "bit_operations.h"
|
|
|
#include "echo.h"
|
|
|
|
|
|
-#define MIN_TX_POWER_FOR_ADAPTION 64
|
|
|
-#define MIN_RX_POWER_FOR_ADAPTION 64
|
|
|
-#define DTD_HANGOVER 600 /* 600 samples, or 75ms */
|
|
|
-#define DC_LOG2BETA 3 /* log2() of DC filter Beta */
|
|
|
+#define MIN_TX_POWER_FOR_ADAPTION 64
|
|
|
+#define MIN_RX_POWER_FOR_ADAPTION 64
|
|
|
+#define DTD_HANGOVER 600 /* 600 samples, or 75ms */
|
|
|
+#define DC_LOG2BETA 3 /* log2() of DC filter Beta */
|
|
|
|
|
|
-/*-----------------------------------------------------------------------*\
|
|
|
- FUNCTIONS
|
|
|
-\*-----------------------------------------------------------------------*/
|
|
|
|
|
|
/* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */
|
|
|
|
|
@@ -328,7 +325,7 @@ void oslec_snapshot(struct oslec_state *ec)
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(oslec_snapshot);
|
|
|
|
|
|
-/* Dual Path Echo Canceller ------------------------------------------------*/
|
|
|
+/* Dual Path Echo Canceller */
|
|
|
|
|
|
int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
|
|
|
{
|
|
@@ -336,9 +333,11 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
|
|
|
int clean_bg;
|
|
|
int tmp, tmp1;
|
|
|
|
|
|
- /* Input scaling was found be required to prevent problems when tx
|
|
|
- starts clipping. Another possible way to handle this would be the
|
|
|
- filter coefficent scaling. */
|
|
|
+ /*
|
|
|
+ * Input scaling was found be required to prevent problems when tx
|
|
|
+ * starts clipping. Another possible way to handle this would be the
|
|
|
+ * filter coefficent scaling.
|
|
|
+ */
|
|
|
|
|
|
ec->tx = tx;
|
|
|
ec->rx = rx;
|
|
@@ -346,33 +345,40 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
|
|
|
rx >>= 1;
|
|
|
|
|
|
/*
|
|
|
- Filter DC, 3dB point is 160Hz (I think), note 32 bit precision required
|
|
|
- otherwise values do not track down to 0. Zero at DC, Pole at (1-Beta)
|
|
|
- only real axis. Some chip sets (like Si labs) don't need
|
|
|
- this, but something like a $10 X100P card does. Any DC really slows
|
|
|
- down convergence.
|
|
|
-
|
|
|
- Note: removes some low frequency from the signal, this reduces
|
|
|
- the speech quality when listening to samples through headphones
|
|
|
- but may not be obvious through a telephone handset.
|
|
|
-
|
|
|
- Note that the 3dB frequency in radians is approx Beta, e.g. for
|
|
|
- Beta = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz.
|
|
|
+ * Filter DC, 3dB point is 160Hz (I think), note 32 bit precision
|
|
|
+ * required otherwise values do not track down to 0. Zero at DC, Pole
|
|
|
+ * at (1-Beta) only real axis. Some chip sets (like Si labs) don't
|
|
|
+ * need this, but something like a $10 X100P card does. Any DC really
|
|
|
+ * slows down convergence.
|
|
|
+ *
|
|
|
+ * Note: removes some low frequency from the signal, this reduces the
|
|
|
+ * speech quality when listening to samples through headphones but may
|
|
|
+ * not be obvious through a telephone handset.
|
|
|
+ *
|
|
|
+ * Note that the 3dB frequency in radians is approx Beta, e.g. for Beta
|
|
|
+ * = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz.
|
|
|
*/
|
|
|
|
|
|
if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) {
|
|
|
tmp = rx << 15;
|
|
|
#if 1
|
|
|
- /* Make sure the gain of the HPF is 1.0. This can still saturate a little under
|
|
|
- impulse conditions, and it might roll to 32768 and need clipping on sustained peak
|
|
|
- level signals. However, the scale of such clipping is small, and the error due to
|
|
|
- any saturation should not markedly affect the downstream processing. */
|
|
|
+ /*
|
|
|
+ * Make sure the gain of the HPF is 1.0. This can still
|
|
|
+ * saturate a little under impulse conditions, and it might
|
|
|
+ * roll to 32768 and need clipping on sustained peak level
|
|
|
+ * signals. However, the scale of such clipping is small, and
|
|
|
+ * the error due to any saturation should not markedly affect
|
|
|
+ * the downstream processing.
|
|
|
+ */
|
|
|
tmp -= (tmp >> 4);
|
|
|
#endif
|
|
|
ec->rx_1 += -(ec->rx_1 >> DC_LOG2BETA) + tmp - ec->rx_2;
|
|
|
|
|
|
- /* hard limit filter to prevent clipping. Note that at this stage
|
|
|
- rx should be limited to +/- 16383 due to right shift above */
|
|
|
+ /*
|
|
|
+ * hard limit filter to prevent clipping. Note that at this
|
|
|
+ * stage rx should be limited to +/- 16383 due to right shift
|
|
|
+ * above
|
|
|
+ */
|
|
|
tmp1 = ec->rx_1 >> 15;
|
|
|
if (tmp1 > 16383)
|
|
|
tmp1 = 16383;
|
|
@@ -407,7 +413,7 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
|
|
|
ec->Lrxacc += abs(rx) - ec->Lrx;
|
|
|
ec->Lrx = (ec->Lrxacc + (1 << 4)) >> 5;
|
|
|
|
|
|
- /* Foreground filter --------------------------------------------------- */
|
|
|
+ /* Foreground filter */
|
|
|
|
|
|
ec->fir_state.coeffs = ec->fir_taps16[0];
|
|
|
echo_value = fir16(&ec->fir_state, tx);
|
|
@@ -415,14 +421,14 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
|
|
|
ec->Lcleanacc += abs(ec->clean) - ec->Lclean;
|
|
|
ec->Lclean = (ec->Lcleanacc + (1 << 4)) >> 5;
|
|
|
|
|
|
- /* Background filter --------------------------------------------------- */
|
|
|
+ /* Background filter */
|
|
|
|
|
|
echo_value = fir16(&ec->fir_state_bg, tx);
|
|
|
clean_bg = rx - echo_value;
|
|
|
ec->Lclean_bgacc += abs(clean_bg) - ec->Lclean_bg;
|
|
|
ec->Lclean_bg = (ec->Lclean_bgacc + (1 << 4)) >> 5;
|
|
|
|
|
|
- /* Background Filter adaption ----------------------------------------- */
|
|
|
+ /* Background Filter adaption */
|
|
|
|
|
|
/* Almost always adap bg filter, just simple DT and energy
|
|
|
detection to minimise adaption in cases of strong double talk.
|
|
@@ -483,7 +489,7 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
|
|
|
if (ec->nonupdate_dwell)
|
|
|
ec->nonupdate_dwell--;
|
|
|
|
|
|
- /* Transfer logic ------------------------------------------------------ */
|
|
|
+ /* Transfer logic */
|
|
|
|
|
|
/* These conditions are from the dual path paper [1], I messed with
|
|
|
them a bit to improve performance. */
|
|
@@ -495,7 +501,10 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
|
|
|
/* (ec->Lclean_bg < 0.125*ec->Ltx) */
|
|
|
(8 * ec->Lclean_bg < ec->Ltx)) {
|
|
|
if (ec->cond_met == 6) {
|
|
|
- /* BG filter has had better results for 6 consecutive samples */
|
|
|
+ /*
|
|
|
+ * BG filter has had better results for 6 consecutive
|
|
|
+ * samples
|
|
|
+ */
|
|
|
ec->adapt = 1;
|
|
|
memcpy(ec->fir_taps16[0], ec->fir_taps16[1],
|
|
|
ec->taps * sizeof(int16_t));
|
|
@@ -504,25 +513,34 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
|
|
|
} else
|
|
|
ec->cond_met = 0;
|
|
|
|
|
|
- /* Non-Linear Processing --------------------------------------------------- */
|
|
|
+ /* Non-Linear Processing */
|
|
|
|
|
|
ec->clean_nlp = ec->clean;
|
|
|
if (ec->adaption_mode & ECHO_CAN_USE_NLP) {
|
|
|
- /* Non-linear processor - a fancy way to say "zap small signals, to avoid
|
|
|
- residual echo due to (uLaw/ALaw) non-linearity in the channel.". */
|
|
|
+ /*
|
|
|
+ * Non-linear processor - a fancy way to say "zap small
|
|
|
+ * signals, to avoid residual echo due to (uLaw/ALaw)
|
|
|
+ * non-linearity in the channel.".
|
|
|
+ */
|
|
|
|
|
|
if ((16 * ec->Lclean < ec->Ltx)) {
|
|
|
- /* Our e/c has improved echo by at least 24 dB (each factor of 2 is 6dB,
|
|
|
- so 2*2*2*2=16 is the same as 6+6+6+6=24dB) */
|
|
|
+ /*
|
|
|
+ * Our e/c has improved echo by at least 24 dB (each
|
|
|
+ * factor of 2 is 6dB, so 2*2*2*2=16 is the same as
|
|
|
+ * 6+6+6+6=24dB)
|
|
|
+ */
|
|
|
if (ec->adaption_mode & ECHO_CAN_USE_CNG) {
|
|
|
ec->cng_level = ec->Lbgn;
|
|
|
|
|
|
- /* Very elementary comfort noise generation. Just random
|
|
|
- numbers rolled off very vaguely Hoth-like. DR: This
|
|
|
- noise doesn't sound quite right to me - I suspect there
|
|
|
- are some overlfow issues in the filtering as it's too
|
|
|
- "crackly". TODO: debug this, maybe just play noise at
|
|
|
- high level or look at spectrum.
|
|
|
+ /*
|
|
|
+ * Very elementary comfort noise generation.
|
|
|
+ * Just random numbers rolled off very vaguely
|
|
|
+ * Hoth-like. DR: This noise doesn't sound
|
|
|
+ * quite right to me - I suspect there are some
|
|
|
+ * overlfow issues in the filtering as it's too
|
|
|
+ * "crackly".
|
|
|
+ * TODO: debug this, maybe just play noise at
|
|
|
+ * high level or look at spectrum.
|
|
|
*/
|
|
|
|
|
|
ec->cng_rndnum =
|
|
@@ -540,18 +558,22 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
|
|
|
if (ec->clean_nlp < -ec->Lbgn)
|
|
|
ec->clean_nlp = -ec->Lbgn;
|
|
|
} else {
|
|
|
- /* just mute the residual, doesn't sound very good, used mainly
|
|
|
- in G168 tests */
|
|
|
+ /*
|
|
|
+ * just mute the residual, doesn't sound very
|
|
|
+ * good, used mainly in G168 tests
|
|
|
+ */
|
|
|
ec->clean_nlp = 0;
|
|
|
}
|
|
|
} else {
|
|
|
- /* Background noise estimator. I tried a few algorithms
|
|
|
- here without much luck. This very simple one seems to
|
|
|
- work best, we just average the level using a slow (1 sec
|
|
|
- time const) filter if the current level is less than a
|
|
|
- (experimentally derived) constant. This means we dont
|
|
|
- include high level signals like near end speech. When
|
|
|
- combined with CNG or especially CLIP seems to work OK.
|
|
|
+ /*
|
|
|
+ * Background noise estimator. I tried a few
|
|
|
+ * algorithms here without much luck. This very simple
|
|
|
+ * one seems to work best, we just average the level
|
|
|
+ * using a slow (1 sec time const) filter if the
|
|
|
+ * current level is less than a (experimentally
|
|
|
+ * derived) constant. This means we dont include high
|
|
|
+ * level signals like near end speech. When combined
|
|
|
+ * with CNG or especially CLIP seems to work OK.
|
|
|
*/
|
|
|
if (ec->Lclean < 40) {
|
|
|
ec->Lbgn_acc += abs(ec->clean) - ec->Lbgn;
|
|
@@ -587,12 +609,13 @@ EXPORT_SYMBOL_GPL(oslec_update);
|
|
|
It can also help by removing and DC in the tx signal. DC is bad
|
|
|
for LMS algorithms.
|
|
|
|
|
|
- This is one of the classic DC removal filters, adjusted to provide sufficient
|
|
|
- bass rolloff to meet the above requirement to protect hybrids from things that
|
|
|
- upset them. The difference between successive samples produces a lousy HPF, and
|
|
|
- then a suitably placed pole flattens things out. The final result is a nicely
|
|
|
- rolled off bass end. The filtering is implemented with extended fractional
|
|
|
- precision, which noise shapes things, giving very clean DC removal.
|
|
|
+ This is one of the classic DC removal filters, adjusted to provide
|
|
|
+ sufficient bass rolloff to meet the above requirement to protect hybrids
|
|
|
+ from things that upset them. The difference between successive samples
|
|
|
+ produces a lousy HPF, and then a suitably placed pole flattens things out.
|
|
|
+ The final result is a nicely rolled off bass end. The filtering is
|
|
|
+ implemented with extended fractional precision, which noise shapes things,
|
|
|
+ giving very clean DC removal.
|
|
|
*/
|
|
|
|
|
|
int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx)
|
|
@@ -602,10 +625,14 @@ int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx)
|
|
|
if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) {
|
|
|
tmp = tx << 15;
|
|
|
#if 1
|
|
|
- /* Make sure the gain of the HPF is 1.0. The first can still saturate a little under
|
|
|
- impulse conditions, and it might roll to 32768 and need clipping on sustained peak
|
|
|
- level signals. However, the scale of such clipping is small, and the error due to
|
|
|
- any saturation should not markedly affect the downstream processing. */
|
|
|
+ /*
|
|
|
+ * Make sure the gain of the HPF is 1.0. The first can still
|
|
|
+ * saturate a little under impulse conditions, and it might
|
|
|
+ * roll to 32768 and need clipping on sustained peak level
|
|
|
+ * signals. However, the scale of such clipping is small, and
|
|
|
+ * the error due to any saturation should not markedly affect
|
|
|
+ * the downstream processing.
|
|
|
+ */
|
|
|
tmp -= (tmp >> 4);
|
|
|
#endif
|
|
|
ec->tx_1 += -(ec->tx_1 >> DC_LOG2BETA) + tmp - ec->tx_2;
|