Staging: Lindent the echo driver
Lindent drivers/staging/echo* Signed-off by: J.R. Mauro <jrm8005@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
This commit is contained in:
parent
786ed801ac
commit
4460a860f7
6 changed files with 609 additions and 627 deletions
|
@ -36,14 +36,15 @@
|
|||
\return The bit number of the highest set bit, or -1 if the word is zero. */
|
||||
static __inline__ int top_bit(unsigned int bits)
|
||||
{
|
||||
int res;
|
||||
int res;
|
||||
|
||||
__asm__ (" xorl %[res],%[res];\n"
|
||||
" decl %[res];\n"
|
||||
" bsrl %[bits],%[res]\n"
|
||||
: [res] "=&r" (res)
|
||||
: [bits] "rm" (bits));
|
||||
return res;
|
||||
__asm__(" xorl %[res],%[res];\n"
|
||||
" decl %[res];\n"
|
||||
" bsrl %[bits],%[res]\n"
|
||||
:[res] "=&r" (res)
|
||||
:[bits] "rm"(bits)
|
||||
);
|
||||
return res;
|
||||
}
|
||||
|
||||
/*! \brief Find the bit position of the lowest set bit in a word
|
||||
|
@ -51,84 +52,75 @@ static __inline__ int top_bit(unsigned int bits)
|
|||
\return The bit number of the lowest set bit, or -1 if the word is zero. */
|
||||
static __inline__ int bottom_bit(unsigned int bits)
|
||||
{
|
||||
int res;
|
||||
int res;
|
||||
|
||||
__asm__ (" xorl %[res],%[res];\n"
|
||||
" decl %[res];\n"
|
||||
" bsfl %[bits],%[res]\n"
|
||||
: [res] "=&r" (res)
|
||||
: [bits] "rm" (bits));
|
||||
return res;
|
||||
__asm__(" xorl %[res],%[res];\n"
|
||||
" decl %[res];\n"
|
||||
" bsfl %[bits],%[res]\n"
|
||||
:[res] "=&r" (res)
|
||||
:[bits] "rm"(bits)
|
||||
);
|
||||
return res;
|
||||
}
|
||||
#else
|
||||
static __inline__ int top_bit(unsigned int bits)
|
||||
{
|
||||
int i;
|
||||
int i;
|
||||
|
||||
if (bits == 0)
|
||||
return -1;
|
||||
i = 0;
|
||||
if (bits & 0xFFFF0000)
|
||||
{
|
||||
bits &= 0xFFFF0000;
|
||||
i += 16;
|
||||
}
|
||||
if (bits & 0xFF00FF00)
|
||||
{
|
||||
bits &= 0xFF00FF00;
|
||||
i += 8;
|
||||
}
|
||||
if (bits & 0xF0F0F0F0)
|
||||
{
|
||||
bits &= 0xF0F0F0F0;
|
||||
i += 4;
|
||||
}
|
||||
if (bits & 0xCCCCCCCC)
|
||||
{
|
||||
bits &= 0xCCCCCCCC;
|
||||
i += 2;
|
||||
}
|
||||
if (bits & 0xAAAAAAAA)
|
||||
{
|
||||
bits &= 0xAAAAAAAA;
|
||||
i += 1;
|
||||
}
|
||||
return i;
|
||||
if (bits == 0)
|
||||
return -1;
|
||||
i = 0;
|
||||
if (bits & 0xFFFF0000) {
|
||||
bits &= 0xFFFF0000;
|
||||
i += 16;
|
||||
}
|
||||
if (bits & 0xFF00FF00) {
|
||||
bits &= 0xFF00FF00;
|
||||
i += 8;
|
||||
}
|
||||
if (bits & 0xF0F0F0F0) {
|
||||
bits &= 0xF0F0F0F0;
|
||||
i += 4;
|
||||
}
|
||||
if (bits & 0xCCCCCCCC) {
|
||||
bits &= 0xCCCCCCCC;
|
||||
i += 2;
|
||||
}
|
||||
if (bits & 0xAAAAAAAA) {
|
||||
bits &= 0xAAAAAAAA;
|
||||
i += 1;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
static __inline__ int bottom_bit(unsigned int bits)
|
||||
{
|
||||
int i;
|
||||
int i;
|
||||
|
||||
if (bits == 0)
|
||||
return -1;
|
||||
i = 32;
|
||||
if (bits & 0x0000FFFF)
|
||||
{
|
||||
bits &= 0x0000FFFF;
|
||||
i -= 16;
|
||||
}
|
||||
if (bits & 0x00FF00FF)
|
||||
{
|
||||
bits &= 0x00FF00FF;
|
||||
i -= 8;
|
||||
}
|
||||
if (bits & 0x0F0F0F0F)
|
||||
{
|
||||
bits &= 0x0F0F0F0F;
|
||||
i -= 4;
|
||||
}
|
||||
if (bits & 0x33333333)
|
||||
{
|
||||
bits &= 0x33333333;
|
||||
i -= 2;
|
||||
}
|
||||
if (bits & 0x55555555)
|
||||
{
|
||||
bits &= 0x55555555;
|
||||
i -= 1;
|
||||
}
|
||||
return i;
|
||||
if (bits == 0)
|
||||
return -1;
|
||||
i = 32;
|
||||
if (bits & 0x0000FFFF) {
|
||||
bits &= 0x0000FFFF;
|
||||
i -= 16;
|
||||
}
|
||||
if (bits & 0x00FF00FF) {
|
||||
bits &= 0x00FF00FF;
|
||||
i -= 8;
|
||||
}
|
||||
if (bits & 0x0F0F0F0F) {
|
||||
bits &= 0x0F0F0F0F;
|
||||
i -= 4;
|
||||
}
|
||||
if (bits & 0x33333333) {
|
||||
bits &= 0x33333333;
|
||||
i -= 2;
|
||||
}
|
||||
if (bits & 0x55555555) {
|
||||
bits &= 0x55555555;
|
||||
i -= 1;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -138,13 +130,14 @@ static __inline__ int bottom_bit(unsigned int bits)
|
|||
static __inline__ uint8_t bit_reverse8(uint8_t x)
|
||||
{
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
/* If multiply is fast */
|
||||
return ((x*0x0802U & 0x22110U) | (x*0x8020U & 0x88440U))*0x10101U >> 16;
|
||||
/* If multiply is fast */
|
||||
return ((x * 0x0802U & 0x22110U) | (x * 0x8020U & 0x88440U)) *
|
||||
0x10101U >> 16;
|
||||
#else
|
||||
/* If multiply is slow, but we have a barrel shifter */
|
||||
x = (x >> 4) | (x << 4);
|
||||
x = ((x & 0xCC) >> 2) | ((x & 0x33) << 2);
|
||||
return ((x & 0xAA) >> 1) | ((x & 0x55) << 1);
|
||||
/* If multiply is slow, but we have a barrel shifter */
|
||||
x = (x >> 4) | (x << 4);
|
||||
x = ((x & 0xCC) >> 2) | ((x & 0x33) << 2);
|
||||
return ((x & 0xAA) >> 1) | ((x & 0x55) << 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -184,7 +177,7 @@ uint16_t make_mask16(uint16_t x);
|
|||
\return The word with the single set bit. */
|
||||
static __inline__ uint32_t least_significant_one32(uint32_t x)
|
||||
{
|
||||
return (x & (-(int32_t) x));
|
||||
return (x & (-(int32_t) x));
|
||||
}
|
||||
|
||||
/*! \brief Find the most significant one in a word, and return a word
|
||||
|
@ -194,10 +187,10 @@ static __inline__ uint32_t least_significant_one32(uint32_t x)
|
|||
static __inline__ uint32_t most_significant_one32(uint32_t x)
|
||||
{
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
return 1 << top_bit(x);
|
||||
return 1 << top_bit(x);
|
||||
#else
|
||||
x = make_mask32(x);
|
||||
return (x ^ (x >> 1));
|
||||
x = make_mask32(x);
|
||||
return (x ^ (x >> 1));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -206,8 +199,8 @@ static __inline__ uint32_t most_significant_one32(uint32_t x)
|
|||
\return 1 for odd, or 0 for even. */
|
||||
static __inline__ int parity8(uint8_t x)
|
||||
{
|
||||
x = (x ^ (x >> 4)) & 0x0F;
|
||||
return (0x6996 >> x) & 1;
|
||||
x = (x ^ (x >> 4)) & 0x0F;
|
||||
return (0x6996 >> x) & 1;
|
||||
}
|
||||
|
||||
/*! \brief Find the parity of a 16 bit word.
|
||||
|
@ -215,9 +208,9 @@ static __inline__ int parity8(uint8_t x)
|
|||
\return 1 for odd, or 0 for even. */
|
||||
static __inline__ int parity16(uint16_t x)
|
||||
{
|
||||
x ^= (x >> 8);
|
||||
x = (x ^ (x >> 4)) & 0x0F;
|
||||
return (0x6996 >> x) & 1;
|
||||
x ^= (x >> 8);
|
||||
x = (x ^ (x >> 4)) & 0x0F;
|
||||
return (0x6996 >> x) & 1;
|
||||
}
|
||||
|
||||
/*! \brief Find the parity of a 32 bit word.
|
||||
|
@ -225,10 +218,10 @@ static __inline__ int parity16(uint16_t x)
|
|||
\return 1 for odd, or 0 for even. */
|
||||
static __inline__ int parity32(uint32_t x)
|
||||
{
|
||||
x ^= (x >> 16);
|
||||
x ^= (x >> 8);
|
||||
x = (x ^ (x >> 4)) & 0x0F;
|
||||
return (0x6996 >> x) & 1;
|
||||
x ^= (x >> 16);
|
||||
x ^= (x >> 8);
|
||||
x = (x ^ (x >> 4)) & 0x0F;
|
||||
return (0x6996 >> x) & 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -74,7 +74,6 @@
|
|||
|
||||
Steve also has some nice notes on echo cancellers in echo.h
|
||||
|
||||
|
||||
References:
|
||||
|
||||
[1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo
|
||||
|
@ -105,7 +104,7 @@
|
|||
Mark, Pawel, and Pavel.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h> /* We're doing kernel work */
|
||||
#include <linux/kernel.h> /* We're doing kernel work */
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
|
@ -115,8 +114,8 @@
|
|||
|
||||
#define MIN_TX_POWER_FOR_ADAPTION 64
|
||||
#define MIN_RX_POWER_FOR_ADAPTION 64
|
||||
#define DTD_HANGOVER 600 /* 600 samples, or 75ms */
|
||||
#define DC_LOG2BETA 3 /* log2() of DC filter Beta */
|
||||
#define DTD_HANGOVER 600 /* 600 samples, or 75ms */
|
||||
#define DC_LOG2BETA 3 /* log2() of DC filter Beta */
|
||||
|
||||
/*-----------------------------------------------------------------------*\
|
||||
FUNCTIONS
|
||||
|
@ -124,59 +123,58 @@
|
|||
|
||||
/* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */
|
||||
|
||||
|
||||
#ifdef __bfin__
|
||||
static void __inline__ lms_adapt_bg(struct oslec_state *ec, int clean, int shift)
|
||||
static void __inline__ lms_adapt_bg(struct oslec_state *ec, int clean,
|
||||
int shift)
|
||||
{
|
||||
int i, j;
|
||||
int offset1;
|
||||
int offset2;
|
||||
int factor;
|
||||
int exp;
|
||||
int16_t *phist;
|
||||
int n;
|
||||
int i, j;
|
||||
int offset1;
|
||||
int offset2;
|
||||
int factor;
|
||||
int exp;
|
||||
int16_t *phist;
|
||||
int n;
|
||||
|
||||
if (shift > 0)
|
||||
factor = clean << shift;
|
||||
else
|
||||
factor = clean >> -shift;
|
||||
if (shift > 0)
|
||||
factor = clean << shift;
|
||||
else
|
||||
factor = clean >> -shift;
|
||||
|
||||
/* Update the FIR taps */
|
||||
/* Update the FIR taps */
|
||||
|
||||
offset2 = ec->curr_pos;
|
||||
offset1 = ec->taps - offset2;
|
||||
phist = &ec->fir_state_bg.history[offset2];
|
||||
offset2 = ec->curr_pos;
|
||||
offset1 = ec->taps - offset2;
|
||||
phist = &ec->fir_state_bg.history[offset2];
|
||||
|
||||
/* st: and en: help us locate the assembler in echo.s */
|
||||
/* st: and en: help us locate the assembler in echo.s */
|
||||
|
||||
//asm("st:");
|
||||
n = ec->taps;
|
||||
for (i = 0, j = offset2; i < n; i++, j++)
|
||||
{
|
||||
exp = *phist++ * factor;
|
||||
ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
|
||||
}
|
||||
//asm("en:");
|
||||
//asm("st:");
|
||||
n = ec->taps;
|
||||
for (i = 0, j = offset2; i < n; i++, j++) {
|
||||
exp = *phist++ * factor;
|
||||
ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
|
||||
}
|
||||
//asm("en:");
|
||||
|
||||
/* Note the asm for the inner loop above generated by Blackfin gcc
|
||||
4.1.1 is pretty good (note even parallel instructions used):
|
||||
/* Note the asm for the inner loop above generated by Blackfin gcc
|
||||
4.1.1 is pretty good (note even parallel instructions used):
|
||||
|
||||
R0 = W [P0++] (X);
|
||||
R0 *= R2;
|
||||
R0 = R0 + R3 (NS) ||
|
||||
R1 = W [P1] (X) ||
|
||||
nop;
|
||||
R0 >>>= 15;
|
||||
R0 = R0 + R1;
|
||||
W [P1++] = R0;
|
||||
R0 = W [P0++] (X);
|
||||
R0 *= R2;
|
||||
R0 = R0 + R3 (NS) ||
|
||||
R1 = W [P1] (X) ||
|
||||
nop;
|
||||
R0 >>>= 15;
|
||||
R0 = R0 + R1;
|
||||
W [P1++] = R0;
|
||||
|
||||
A block based update algorithm would be much faster but the
|
||||
above can't be improved on much. Every instruction saved in
|
||||
the loop above is 2 MIPs/ch! The for loop above is where the
|
||||
Blackfin spends most of it's time - about 17 MIPs/ch measured
|
||||
with speedtest.c with 256 taps (32ms). Write-back and
|
||||
Write-through cache gave about the same performance.
|
||||
*/
|
||||
A block based update algorithm would be much faster but the
|
||||
above can't be improved on much. Every instruction saved in
|
||||
the loop above is 2 MIPs/ch! The for loop above is where the
|
||||
Blackfin spends most of it's time - about 17 MIPs/ch measured
|
||||
with speedtest.c with 256 taps (32ms). Write-back and
|
||||
Write-through cache gave about the same performance.
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -198,94 +196,90 @@ static void __inline__ lms_adapt_bg(struct oslec_state *ec, int clean, int shift
|
|||
*/
|
||||
|
||||
#else
|
||||
static __inline__ void lms_adapt_bg(struct oslec_state *ec, int clean, int shift)
|
||||
static __inline__ void lms_adapt_bg(struct oslec_state *ec, int clean,
|
||||
int shift)
|
||||
{
|
||||
int i;
|
||||
int i;
|
||||
|
||||
int offset1;
|
||||
int offset2;
|
||||
int factor;
|
||||
int exp;
|
||||
int offset1;
|
||||
int offset2;
|
||||
int factor;
|
||||
int exp;
|
||||
|
||||
if (shift > 0)
|
||||
factor = clean << shift;
|
||||
else
|
||||
factor = clean >> -shift;
|
||||
if (shift > 0)
|
||||
factor = clean << shift;
|
||||
else
|
||||
factor = clean >> -shift;
|
||||
|
||||
/* Update the FIR taps */
|
||||
/* Update the FIR taps */
|
||||
|
||||
offset2 = ec->curr_pos;
|
||||
offset1 = ec->taps - offset2;
|
||||
offset2 = ec->curr_pos;
|
||||
offset1 = ec->taps - offset2;
|
||||
|
||||
for (i = ec->taps - 1; i >= offset1; i--)
|
||||
{
|
||||
exp = (ec->fir_state_bg.history[i - offset1]*factor);
|
||||
ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
|
||||
}
|
||||
for ( ; i >= 0; i--)
|
||||
{
|
||||
exp = (ec->fir_state_bg.history[i + offset2]*factor);
|
||||
ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
|
||||
}
|
||||
for (i = ec->taps - 1; i >= offset1; i--) {
|
||||
exp = (ec->fir_state_bg.history[i - offset1] * factor);
|
||||
ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
|
||||
}
|
||||
for (; i >= 0; i--) {
|
||||
exp = (ec->fir_state_bg.history[i + offset2] * factor);
|
||||
ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
struct oslec_state *oslec_create(int len, int adaption_mode)
|
||||
{
|
||||
struct oslec_state *ec;
|
||||
int i;
|
||||
struct oslec_state *ec;
|
||||
int i;
|
||||
|
||||
ec = kzalloc(sizeof(*ec), GFP_KERNEL);
|
||||
if (!ec)
|
||||
return NULL;
|
||||
ec = kzalloc(sizeof(*ec), GFP_KERNEL);
|
||||
if (!ec)
|
||||
return NULL;
|
||||
|
||||
ec->taps = len;
|
||||
ec->log2taps = top_bit(len);
|
||||
ec->curr_pos = ec->taps - 1;
|
||||
ec->taps = len;
|
||||
ec->log2taps = top_bit(len);
|
||||
ec->curr_pos = ec->taps - 1;
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
ec->fir_taps16[i] = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
|
||||
if (!ec->fir_taps16[i])
|
||||
goto error_oom;
|
||||
}
|
||||
for (i = 0; i < 2; i++) {
|
||||
ec->fir_taps16[i] =
|
||||
kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
|
||||
if (!ec->fir_taps16[i])
|
||||
goto error_oom;
|
||||
}
|
||||
|
||||
fir16_create(&ec->fir_state,
|
||||
ec->fir_taps16[0],
|
||||
ec->taps);
|
||||
fir16_create(&ec->fir_state_bg,
|
||||
ec->fir_taps16[1],
|
||||
ec->taps);
|
||||
fir16_create(&ec->fir_state, ec->fir_taps16[0], ec->taps);
|
||||
fir16_create(&ec->fir_state_bg, ec->fir_taps16[1], ec->taps);
|
||||
|
||||
for(i=0; i<5; i++) {
|
||||
ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0;
|
||||
}
|
||||
for (i = 0; i < 5; i++) {
|
||||
ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0;
|
||||
}
|
||||
|
||||
ec->cng_level = 1000;
|
||||
oslec_adaption_mode(ec, adaption_mode);
|
||||
ec->cng_level = 1000;
|
||||
oslec_adaption_mode(ec, adaption_mode);
|
||||
|
||||
ec->snapshot = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
|
||||
if (!ec->snapshot)
|
||||
goto error_oom;
|
||||
ec->snapshot = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
|
||||
if (!ec->snapshot)
|
||||
goto error_oom;
|
||||
|
||||
ec->cond_met = 0;
|
||||
ec->Pstates = 0;
|
||||
ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
|
||||
ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
|
||||
ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
|
||||
ec->Lbgn = ec->Lbgn_acc = 0;
|
||||
ec->Lbgn_upper = 200;
|
||||
ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
|
||||
ec->cond_met = 0;
|
||||
ec->Pstates = 0;
|
||||
ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
|
||||
ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
|
||||
ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
|
||||
ec->Lbgn = ec->Lbgn_acc = 0;
|
||||
ec->Lbgn_upper = 200;
|
||||
ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
|
||||
|
||||
return ec;
|
||||
return ec;
|
||||
|
||||
error_oom:
|
||||
for (i = 0; i < 2; i++)
|
||||
kfree(ec->fir_taps16[i]);
|
||||
error_oom:
|
||||
for (i = 0; i < 2; i++)
|
||||
kfree(ec->fir_taps16[i]);
|
||||
|
||||
kfree(ec);
|
||||
return NULL;
|
||||
kfree(ec);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(oslec_create);
|
||||
|
||||
void oslec_free(struct oslec_state *ec)
|
||||
|
@ -294,293 +288,300 @@ void oslec_free(struct oslec_state *ec)
|
|||
|
||||
fir16_free(&ec->fir_state);
|
||||
fir16_free(&ec->fir_state_bg);
|
||||
for (i = 0; i < 2; i++)
|
||||
for (i = 0; i < 2; i++)
|
||||
kfree(ec->fir_taps16[i]);
|
||||
kfree(ec->snapshot);
|
||||
kfree(ec);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(oslec_free);
|
||||
|
||||
void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode)
|
||||
{
|
||||
ec->adaption_mode = adaption_mode;
|
||||
ec->adaption_mode = adaption_mode;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(oslec_adaption_mode);
|
||||
|
||||
void oslec_flush(struct oslec_state *ec)
|
||||
{
|
||||
int i;
|
||||
int i;
|
||||
|
||||
ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
|
||||
ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
|
||||
ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
|
||||
ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
|
||||
ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
|
||||
ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
|
||||
|
||||
ec->Lbgn = ec->Lbgn_acc = 0;
|
||||
ec->Lbgn_upper = 200;
|
||||
ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
|
||||
ec->Lbgn = ec->Lbgn_acc = 0;
|
||||
ec->Lbgn_upper = 200;
|
||||
ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
|
||||
|
||||
ec->nonupdate_dwell = 0;
|
||||
ec->nonupdate_dwell = 0;
|
||||
|
||||
fir16_flush(&ec->fir_state);
|
||||
fir16_flush(&ec->fir_state_bg);
|
||||
ec->fir_state.curr_pos = ec->taps - 1;
|
||||
ec->fir_state_bg.curr_pos = ec->taps - 1;
|
||||
for (i = 0; i < 2; i++)
|
||||
memset(ec->fir_taps16[i], 0, ec->taps*sizeof(int16_t));
|
||||
fir16_flush(&ec->fir_state);
|
||||
fir16_flush(&ec->fir_state_bg);
|
||||
ec->fir_state.curr_pos = ec->taps - 1;
|
||||
ec->fir_state_bg.curr_pos = ec->taps - 1;
|
||||
for (i = 0; i < 2; i++)
|
||||
memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t));
|
||||
|
||||
ec->curr_pos = ec->taps - 1;
|
||||
ec->Pstates = 0;
|
||||
ec->curr_pos = ec->taps - 1;
|
||||
ec->Pstates = 0;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(oslec_flush);
|
||||
|
||||
void oslec_snapshot(struct oslec_state *ec) {
|
||||
memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps*sizeof(int16_t));
|
||||
void oslec_snapshot(struct oslec_state *ec)
|
||||
{
|
||||
memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps * sizeof(int16_t));
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(oslec_snapshot);
|
||||
|
||||
/* Dual Path Echo Canceller ------------------------------------------------*/
|
||||
|
||||
int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
|
||||
{
|
||||
int32_t echo_value;
|
||||
int clean_bg;
|
||||
int tmp, tmp1;
|
||||
int32_t echo_value;
|
||||
int clean_bg;
|
||||
int tmp, tmp1;
|
||||
|
||||
/* Input scaling was found be required to prevent problems when tx
|
||||
starts clipping. Another possible way to handle this would be the
|
||||
filter coefficent scaling. */
|
||||
/* Input scaling was found be required to prevent problems when tx
|
||||
starts clipping. Another possible way to handle this would be the
|
||||
filter coefficent scaling. */
|
||||
|
||||
ec->tx = tx; ec->rx = rx;
|
||||
tx >>=1;
|
||||
rx >>=1;
|
||||
ec->tx = tx;
|
||||
ec->rx = rx;
|
||||
tx >>= 1;
|
||||
rx >>= 1;
|
||||
|
||||
/*
|
||||
Filter DC, 3dB point is 160Hz (I think), note 32 bit precision required
|
||||
otherwise values do not track down to 0. Zero at DC, Pole at (1-Beta)
|
||||
only real axis. Some chip sets (like Si labs) don't need
|
||||
this, but something like a $10 X100P card does. Any DC really slows
|
||||
down convergence.
|
||||
/*
|
||||
Filter DC, 3dB point is 160Hz (I think), note 32 bit precision required
|
||||
otherwise values do not track down to 0. Zero at DC, Pole at (1-Beta)
|
||||
only real axis. Some chip sets (like Si labs) don't need
|
||||
this, but something like a $10 X100P card does. Any DC really slows
|
||||
down convergence.
|
||||
|
||||
Note: removes some low frequency from the signal, this reduces
|
||||
the speech quality when listening to samples through headphones
|
||||
but may not be obvious through a telephone handset.
|
||||
Note: removes some low frequency from the signal, this reduces
|
||||
the speech quality when listening to samples through headphones
|
||||
but may not be obvious through a telephone handset.
|
||||
|
||||
Note that the 3dB frequency in radians is approx Beta, e.g. for
|
||||
Beta = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz.
|
||||
*/
|
||||
Note that the 3dB frequency in radians is approx Beta, e.g. for
|
||||
Beta = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz.
|
||||
*/
|
||||
|
||||
if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) {
|
||||
tmp = rx << 15;
|
||||
if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) {
|
||||
tmp = rx << 15;
|
||||
#if 1
|
||||
/* Make sure the gain of the HPF is 1.0. This can still saturate a little under
|
||||
impulse conditions, and it might roll to 32768 and need clipping on sustained peak
|
||||
level signals. However, the scale of such clipping is small, and the error due to
|
||||
any saturation should not markedly affect the downstream processing. */
|
||||
tmp -= (tmp >> 4);
|
||||
/* Make sure the gain of the HPF is 1.0. This can still saturate a little under
|
||||
impulse conditions, and it might roll to 32768 and need clipping on sustained peak
|
||||
level signals. However, the scale of such clipping is small, and the error due to
|
||||
any saturation should not markedly affect the downstream processing. */
|
||||
tmp -= (tmp >> 4);
|
||||
#endif
|
||||
ec->rx_1 += -(ec->rx_1>>DC_LOG2BETA) + tmp - ec->rx_2;
|
||||
ec->rx_1 += -(ec->rx_1 >> DC_LOG2BETA) + tmp - ec->rx_2;
|
||||
|
||||
/* hard limit filter to prevent clipping. Note that at this stage
|
||||
rx should be limited to +/- 16383 due to right shift above */
|
||||
tmp1 = ec->rx_1 >> 15;
|
||||
if (tmp1 > 16383) tmp1 = 16383;
|
||||
if (tmp1 < -16383) tmp1 = -16383;
|
||||
rx = tmp1;
|
||||
ec->rx_2 = tmp;
|
||||
}
|
||||
|
||||
/* Block average of power in the filter states. Used for
|
||||
adaption power calculation. */
|
||||
|
||||
{
|
||||
int new, old;
|
||||
|
||||
/* efficient "out with the old and in with the new" algorithm so
|
||||
we don't have to recalculate over the whole block of
|
||||
samples. */
|
||||
new = (int)tx * (int)tx;
|
||||
old = (int)ec->fir_state.history[ec->fir_state.curr_pos] *
|
||||
(int)ec->fir_state.history[ec->fir_state.curr_pos];
|
||||
ec->Pstates += ((new - old) + (1<<ec->log2taps)) >> ec->log2taps;
|
||||
if (ec->Pstates < 0) ec->Pstates = 0;
|
||||
}
|
||||
|
||||
/* Calculate short term average levels using simple single pole IIRs */
|
||||
|
||||
ec->Ltxacc += abs(tx) - ec->Ltx;
|
||||
ec->Ltx = (ec->Ltxacc + (1<<4)) >> 5;
|
||||
ec->Lrxacc += abs(rx) - ec->Lrx;
|
||||
ec->Lrx = (ec->Lrxacc + (1<<4)) >> 5;
|
||||
|
||||
/* Foreground filter ---------------------------------------------------*/
|
||||
|
||||
ec->fir_state.coeffs = ec->fir_taps16[0];
|
||||
echo_value = fir16(&ec->fir_state, tx);
|
||||
ec->clean = rx - echo_value;
|
||||
ec->Lcleanacc += abs(ec->clean) - ec->Lclean;
|
||||
ec->Lclean = (ec->Lcleanacc + (1<<4)) >> 5;
|
||||
|
||||
/* Background filter ---------------------------------------------------*/
|
||||
|
||||
echo_value = fir16(&ec->fir_state_bg, tx);
|
||||
clean_bg = rx - echo_value;
|
||||
ec->Lclean_bgacc += abs(clean_bg) - ec->Lclean_bg;
|
||||
ec->Lclean_bg = (ec->Lclean_bgacc + (1<<4)) >> 5;
|
||||
|
||||
/* Background Filter adaption -----------------------------------------*/
|
||||
|
||||
/* Almost always adap bg filter, just simple DT and energy
|
||||
detection to minimise adaption in cases of strong double talk.
|
||||
However this is not critical for the dual path algorithm.
|
||||
*/
|
||||
ec->factor = 0;
|
||||
ec->shift = 0;
|
||||
if ((ec->nonupdate_dwell == 0)) {
|
||||
int P, logP, shift;
|
||||
|
||||
/* Determine:
|
||||
|
||||
f = Beta * clean_bg_rx/P ------ (1)
|
||||
|
||||
where P is the total power in the filter states.
|
||||
|
||||
The Boffins have shown that if we obey (1) we converge
|
||||
quickly and avoid instability.
|
||||
|
||||
The correct factor f must be in Q30, as this is the fixed
|
||||
point format required by the lms_adapt_bg() function,
|
||||
therefore the scaled version of (1) is:
|
||||
|
||||
(2^30) * f = (2^30) * Beta * clean_bg_rx/P
|
||||
factor = (2^30) * Beta * clean_bg_rx/P ----- (2)
|
||||
|
||||
We have chosen Beta = 0.25 by experiment, so:
|
||||
|
||||
factor = (2^30) * (2^-2) * clean_bg_rx/P
|
||||
|
||||
(30 - 2 - log2(P))
|
||||
factor = clean_bg_rx 2 ----- (3)
|
||||
|
||||
To avoid a divide we approximate log2(P) as top_bit(P),
|
||||
which returns the position of the highest non-zero bit in
|
||||
P. This approximation introduces an error as large as a
|
||||
factor of 2, but the algorithm seems to handle it OK.
|
||||
|
||||
Come to think of it a divide may not be a big deal on a
|
||||
modern DSP, so its probably worth checking out the cycles
|
||||
for a divide versus a top_bit() implementation.
|
||||
*/
|
||||
|
||||
P = MIN_TX_POWER_FOR_ADAPTION + ec->Pstates;
|
||||
logP = top_bit(P) + ec->log2taps;
|
||||
shift = 30 - 2 - logP;
|
||||
ec->shift = shift;
|
||||
|
||||
lms_adapt_bg(ec, clean_bg, shift);
|
||||
}
|
||||
|
||||
/* very simple DTD to make sure we dont try and adapt with strong
|
||||
near end speech */
|
||||
|
||||
ec->adapt = 0;
|
||||
if ((ec->Lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->Lrx > ec->Ltx))
|
||||
ec->nonupdate_dwell = DTD_HANGOVER;
|
||||
if (ec->nonupdate_dwell)
|
||||
ec->nonupdate_dwell--;
|
||||
|
||||
/* Transfer logic ------------------------------------------------------*/
|
||||
|
||||
/* These conditions are from the dual path paper [1], I messed with
|
||||
them a bit to improve performance. */
|
||||
|
||||
if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) &&
|
||||
(ec->nonupdate_dwell == 0) &&
|
||||
(8*ec->Lclean_bg < 7*ec->Lclean) /* (ec->Lclean_bg < 0.875*ec->Lclean) */ &&
|
||||
(8*ec->Lclean_bg < ec->Ltx) /* (ec->Lclean_bg < 0.125*ec->Ltx) */ )
|
||||
{
|
||||
if (ec->cond_met == 6) {
|
||||
/* BG filter has had better results for 6 consecutive samples */
|
||||
ec->adapt = 1;
|
||||
memcpy(ec->fir_taps16[0], ec->fir_taps16[1], ec->taps*sizeof(int16_t));
|
||||
/* hard limit filter to prevent clipping. Note that at this stage
|
||||
rx should be limited to +/- 16383 due to right shift above */
|
||||
tmp1 = ec->rx_1 >> 15;
|
||||
if (tmp1 > 16383)
|
||||
tmp1 = 16383;
|
||||
if (tmp1 < -16383)
|
||||
tmp1 = -16383;
|
||||
rx = tmp1;
|
||||
ec->rx_2 = tmp;
|
||||
}
|
||||
else
|
||||
ec->cond_met++;
|
||||
}
|
||||
else
|
||||
ec->cond_met = 0;
|
||||
|
||||
/* Non-Linear Processing ---------------------------------------------------*/
|
||||
/* Block average of power in the filter states. Used for
|
||||
adaption power calculation. */
|
||||
|
||||
ec->clean_nlp = ec->clean;
|
||||
if (ec->adaption_mode & ECHO_CAN_USE_NLP)
|
||||
{
|
||||
/* Non-linear processor - a fancy way to say "zap small signals, to avoid
|
||||
residual echo due to (uLaw/ALaw) non-linearity in the channel.". */
|
||||
|
||||
if ((16*ec->Lclean < ec->Ltx))
|
||||
{
|
||||
/* Our e/c has improved echo by at least 24 dB (each factor of 2 is 6dB,
|
||||
so 2*2*2*2=16 is the same as 6+6+6+6=24dB) */
|
||||
if (ec->adaption_mode & ECHO_CAN_USE_CNG)
|
||||
{
|
||||
ec->cng_level = ec->Lbgn;
|
||||
int new, old;
|
||||
|
||||
/* Very elementary comfort noise generation. Just random
|
||||
numbers rolled off very vaguely Hoth-like. DR: This
|
||||
noise doesn't sound quite right to me - I suspect there
|
||||
are some overlfow issues in the filtering as it's too
|
||||
"crackly". TODO: debug this, maybe just play noise at
|
||||
high level or look at spectrum.
|
||||
*/
|
||||
|
||||
ec->cng_rndnum = 1664525U*ec->cng_rndnum + 1013904223U;
|
||||
ec->cng_filter = ((ec->cng_rndnum & 0xFFFF) - 32768 + 5*ec->cng_filter) >> 3;
|
||||
ec->clean_nlp = (ec->cng_filter*ec->cng_level*8) >> 14;
|
||||
|
||||
}
|
||||
else if (ec->adaption_mode & ECHO_CAN_USE_CLIP)
|
||||
{
|
||||
/* This sounds much better than CNG */
|
||||
if (ec->clean_nlp > ec->Lbgn)
|
||||
ec->clean_nlp = ec->Lbgn;
|
||||
if (ec->clean_nlp < -ec->Lbgn)
|
||||
ec->clean_nlp = -ec->Lbgn;
|
||||
/* efficient "out with the old and in with the new" algorithm so
|
||||
we don't have to recalculate over the whole block of
|
||||
samples. */
|
||||
new = (int)tx *(int)tx;
|
||||
old = (int)ec->fir_state.history[ec->fir_state.curr_pos] *
|
||||
(int)ec->fir_state.history[ec->fir_state.curr_pos];
|
||||
ec->Pstates +=
|
||||
((new - old) + (1 << ec->log2taps)) >> ec->log2taps;
|
||||
if (ec->Pstates < 0)
|
||||
ec->Pstates = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* just mute the residual, doesn't sound very good, used mainly
|
||||
in G168 tests */
|
||||
ec->clean_nlp = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Background noise estimator. I tried a few algorithms
|
||||
here without much luck. This very simple one seems to
|
||||
work best, we just average the level using a slow (1 sec
|
||||
time const) filter if the current level is less than a
|
||||
(experimentally derived) constant. This means we dont
|
||||
include high level signals like near end speech. When
|
||||
combined with CNG or especially CLIP seems to work OK.
|
||||
*/
|
||||
if (ec->Lclean < 40) {
|
||||
ec->Lbgn_acc += abs(ec->clean) - ec->Lbgn;
|
||||
ec->Lbgn = (ec->Lbgn_acc + (1<<11)) >> 12;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Roll around the taps buffer */
|
||||
if (ec->curr_pos <= 0)
|
||||
ec->curr_pos = ec->taps;
|
||||
ec->curr_pos--;
|
||||
/* Calculate short term average levels using simple single pole IIRs */
|
||||
|
||||
if (ec->adaption_mode & ECHO_CAN_DISABLE)
|
||||
ec->clean_nlp = rx;
|
||||
ec->Ltxacc += abs(tx) - ec->Ltx;
|
||||
ec->Ltx = (ec->Ltxacc + (1 << 4)) >> 5;
|
||||
ec->Lrxacc += abs(rx) - ec->Lrx;
|
||||
ec->Lrx = (ec->Lrxacc + (1 << 4)) >> 5;
|
||||
|
||||
/* Output scaled back up again to match input scaling */
|
||||
/* Foreground filter --------------------------------------------------- */
|
||||
|
||||
return (int16_t) ec->clean_nlp << 1;
|
||||
ec->fir_state.coeffs = ec->fir_taps16[0];
|
||||
echo_value = fir16(&ec->fir_state, tx);
|
||||
ec->clean = rx - echo_value;
|
||||
ec->Lcleanacc += abs(ec->clean) - ec->Lclean;
|
||||
ec->Lclean = (ec->Lcleanacc + (1 << 4)) >> 5;
|
||||
|
||||
/* Background filter --------------------------------------------------- */
|
||||
|
||||
echo_value = fir16(&ec->fir_state_bg, tx);
|
||||
clean_bg = rx - echo_value;
|
||||
ec->Lclean_bgacc += abs(clean_bg) - ec->Lclean_bg;
|
||||
ec->Lclean_bg = (ec->Lclean_bgacc + (1 << 4)) >> 5;
|
||||
|
||||
/* Background Filter adaption ----------------------------------------- */
|
||||
|
||||
/* Almost always adap bg filter, just simple DT and energy
|
||||
detection to minimise adaption in cases of strong double talk.
|
||||
However this is not critical for the dual path algorithm.
|
||||
*/
|
||||
ec->factor = 0;
|
||||
ec->shift = 0;
|
||||
if ((ec->nonupdate_dwell == 0)) {
|
||||
int P, logP, shift;
|
||||
|
||||
/* Determine:
|
||||
|
||||
f = Beta * clean_bg_rx/P ------ (1)
|
||||
|
||||
where P is the total power in the filter states.
|
||||
|
||||
The Boffins have shown that if we obey (1) we converge
|
||||
quickly and avoid instability.
|
||||
|
||||
The correct factor f must be in Q30, as this is the fixed
|
||||
point format required by the lms_adapt_bg() function,
|
||||
therefore the scaled version of (1) is:
|
||||
|
||||
(2^30) * f = (2^30) * Beta * clean_bg_rx/P
|
||||
factor = (2^30) * Beta * clean_bg_rx/P ----- (2)
|
||||
|
||||
We have chosen Beta = 0.25 by experiment, so:
|
||||
|
||||
factor = (2^30) * (2^-2) * clean_bg_rx/P
|
||||
|
||||
(30 - 2 - log2(P))
|
||||
factor = clean_bg_rx 2 ----- (3)
|
||||
|
||||
To avoid a divide we approximate log2(P) as top_bit(P),
|
||||
which returns the position of the highest non-zero bit in
|
||||
P. This approximation introduces an error as large as a
|
||||
factor of 2, but the algorithm seems to handle it OK.
|
||||
|
||||
Come to think of it a divide may not be a big deal on a
|
||||
modern DSP, so its probably worth checking out the cycles
|
||||
for a divide versus a top_bit() implementation.
|
||||
*/
|
||||
|
||||
P = MIN_TX_POWER_FOR_ADAPTION + ec->Pstates;
|
||||
logP = top_bit(P) + ec->log2taps;
|
||||
shift = 30 - 2 - logP;
|
||||
ec->shift = shift;
|
||||
|
||||
lms_adapt_bg(ec, clean_bg, shift);
|
||||
}
|
||||
|
||||
/* very simple DTD to make sure we dont try and adapt with strong
|
||||
near end speech */
|
||||
|
||||
ec->adapt = 0;
|
||||
if ((ec->Lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->Lrx > ec->Ltx))
|
||||
ec->nonupdate_dwell = DTD_HANGOVER;
|
||||
if (ec->nonupdate_dwell)
|
||||
ec->nonupdate_dwell--;
|
||||
|
||||
/* Transfer logic ------------------------------------------------------ */
|
||||
|
||||
/* These conditions are from the dual path paper [1], I messed with
|
||||
them a bit to improve performance. */
|
||||
|
||||
if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) &&
|
||||
(ec->nonupdate_dwell == 0) &&
|
||||
(8 * ec->Lclean_bg <
|
||||
7 * ec->Lclean) /* (ec->Lclean_bg < 0.875*ec->Lclean) */ &&
|
||||
(8 * ec->Lclean_bg <
|
||||
ec->Ltx) /* (ec->Lclean_bg < 0.125*ec->Ltx) */ ) {
|
||||
if (ec->cond_met == 6) {
|
||||
/* BG filter has had better results for 6 consecutive samples */
|
||||
ec->adapt = 1;
|
||||
memcpy(ec->fir_taps16[0], ec->fir_taps16[1],
|
||||
ec->taps * sizeof(int16_t));
|
||||
} else
|
||||
ec->cond_met++;
|
||||
} else
|
||||
ec->cond_met = 0;
|
||||
|
||||
/* Non-Linear Processing --------------------------------------------------- */
|
||||
|
||||
ec->clean_nlp = ec->clean;
|
||||
if (ec->adaption_mode & ECHO_CAN_USE_NLP) {
|
||||
/* Non-linear processor - a fancy way to say "zap small signals, to avoid
|
||||
residual echo due to (uLaw/ALaw) non-linearity in the channel.". */
|
||||
|
||||
if ((16 * ec->Lclean < ec->Ltx)) {
|
||||
/* Our e/c has improved echo by at least 24 dB (each factor of 2 is 6dB,
|
||||
so 2*2*2*2=16 is the same as 6+6+6+6=24dB) */
|
||||
if (ec->adaption_mode & ECHO_CAN_USE_CNG) {
|
||||
ec->cng_level = ec->Lbgn;
|
||||
|
||||
/* Very elementary comfort noise generation. Just random
|
||||
numbers rolled off very vaguely Hoth-like. DR: This
|
||||
noise doesn't sound quite right to me - I suspect there
|
||||
are some overlfow issues in the filtering as it's too
|
||||
"crackly". TODO: debug this, maybe just play noise at
|
||||
high level or look at spectrum.
|
||||
*/
|
||||
|
||||
ec->cng_rndnum =
|
||||
1664525U * ec->cng_rndnum + 1013904223U;
|
||||
ec->cng_filter =
|
||||
((ec->cng_rndnum & 0xFFFF) - 32768 +
|
||||
5 * ec->cng_filter) >> 3;
|
||||
ec->clean_nlp =
|
||||
(ec->cng_filter * ec->cng_level * 8) >> 14;
|
||||
|
||||
} else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) {
|
||||
/* This sounds much better than CNG */
|
||||
if (ec->clean_nlp > ec->Lbgn)
|
||||
ec->clean_nlp = ec->Lbgn;
|
||||
if (ec->clean_nlp < -ec->Lbgn)
|
||||
ec->clean_nlp = -ec->Lbgn;
|
||||
} else {
|
||||
/* just mute the residual, doesn't sound very good, used mainly
|
||||
in G168 tests */
|
||||
ec->clean_nlp = 0;
|
||||
}
|
||||
} else {
|
||||
/* Background noise estimator. I tried a few algorithms
|
||||
here without much luck. This very simple one seems to
|
||||
work best, we just average the level using a slow (1 sec
|
||||
time const) filter if the current level is less than a
|
||||
(experimentally derived) constant. This means we dont
|
||||
include high level signals like near end speech. When
|
||||
combined with CNG or especially CLIP seems to work OK.
|
||||
*/
|
||||
if (ec->Lclean < 40) {
|
||||
ec->Lbgn_acc += abs(ec->clean) - ec->Lbgn;
|
||||
ec->Lbgn = (ec->Lbgn_acc + (1 << 11)) >> 12;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Roll around the taps buffer */
|
||||
if (ec->curr_pos <= 0)
|
||||
ec->curr_pos = ec->taps;
|
||||
ec->curr_pos--;
|
||||
|
||||
if (ec->adaption_mode & ECHO_CAN_DISABLE)
|
||||
ec->clean_nlp = rx;
|
||||
|
||||
/* Output scaled back up again to match input scaling */
|
||||
|
||||
return (int16_t) ec->clean_nlp << 1;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(oslec_update);
|
||||
|
||||
/* This function is seperated from the echo canceller is it is usually called
|
||||
|
@ -604,28 +605,32 @@ EXPORT_SYMBOL_GPL(oslec_update);
|
|||
precision, which noise shapes things, giving very clean DC removal.
|
||||
*/
|
||||
|
||||
int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx) {
|
||||
int tmp, tmp1;
|
||||
int16_t oslec_hpf_tx(struct oslec_state * ec, int16_t tx)
|
||||
{
|
||||
int tmp, tmp1;
|
||||
|
||||
if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) {
|
||||
tmp = tx << 15;
|
||||
if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) {
|
||||
tmp = tx << 15;
|
||||
#if 1
|
||||
/* Make sure the gain of the HPF is 1.0. The first can still saturate a little under
|
||||
impulse conditions, and it might roll to 32768 and need clipping on sustained peak
|
||||
level signals. However, the scale of such clipping is small, and the error due to
|
||||
any saturation should not markedly affect the downstream processing. */
|
||||
tmp -= (tmp >> 4);
|
||||
/* Make sure the gain of the HPF is 1.0. The first can still saturate a little under
|
||||
impulse conditions, and it might roll to 32768 and need clipping on sustained peak
|
||||
level signals. However, the scale of such clipping is small, and the error due to
|
||||
any saturation should not markedly affect the downstream processing. */
|
||||
tmp -= (tmp >> 4);
|
||||
#endif
|
||||
ec->tx_1 += -(ec->tx_1>>DC_LOG2BETA) + tmp - ec->tx_2;
|
||||
tmp1 = ec->tx_1 >> 15;
|
||||
if (tmp1 > 32767) tmp1 = 32767;
|
||||
if (tmp1 < -32767) tmp1 = -32767;
|
||||
tx = tmp1;
|
||||
ec->tx_2 = tmp;
|
||||
}
|
||||
ec->tx_1 += -(ec->tx_1 >> DC_LOG2BETA) + tmp - ec->tx_2;
|
||||
tmp1 = ec->tx_1 >> 15;
|
||||
if (tmp1 > 32767)
|
||||
tmp1 = 32767;
|
||||
if (tmp1 < -32767)
|
||||
tmp1 = -32767;
|
||||
tx = tmp1;
|
||||
ec->tx_2 = tmp;
|
||||
}
|
||||
|
||||
return tx;
|
||||
return tx;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(oslec_hpf_tx);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
|
|
|
@ -124,9 +124,8 @@ a minor burden.
|
|||
G.168 echo canceller descriptor. This defines the working state for a line
|
||||
echo canceller.
|
||||
*/
|
||||
struct oslec_state
|
||||
{
|
||||
int16_t tx,rx;
|
||||
struct oslec_state {
|
||||
int16_t tx, rx;
|
||||
int16_t clean;
|
||||
int16_t clean_nlp;
|
||||
|
||||
|
@ -170,4 +169,4 @@ struct oslec_state
|
|||
int16_t *snapshot;
|
||||
};
|
||||
|
||||
#endif /* __ECHO_H */
|
||||
#endif /* __ECHO_H */
|
||||
|
|
|
@ -72,8 +72,7 @@
|
|||
16 bit integer FIR descriptor. This defines the working state for a single
|
||||
instance of an FIR filter using 16 bit integer coefficients.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
typedef struct {
|
||||
int taps;
|
||||
int curr_pos;
|
||||
const int16_t *coeffs;
|
||||
|
@ -85,8 +84,7 @@ typedef struct
|
|||
instance of an FIR filter using 32 bit integer coefficients, and filtering
|
||||
16 bit integer data.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
typedef struct {
|
||||
int taps;
|
||||
int curr_pos;
|
||||
const int32_t *coeffs;
|
||||
|
@ -97,39 +95,37 @@ typedef struct
|
|||
Floating point FIR descriptor. This defines the working state for a single
|
||||
instance of an FIR filter using floating point coefficients and data.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
typedef struct {
|
||||
int taps;
|
||||
int curr_pos;
|
||||
const float *coeffs;
|
||||
float *history;
|
||||
} fir_float_state_t;
|
||||
|
||||
static __inline__ const int16_t *fir16_create(fir16_state_t *fir,
|
||||
const int16_t *coeffs,
|
||||
int taps)
|
||||
static __inline__ const int16_t *fir16_create(fir16_state_t * fir,
|
||||
const int16_t * coeffs, int taps)
|
||||
{
|
||||
fir->taps = taps;
|
||||
fir->curr_pos = taps - 1;
|
||||
fir->coeffs = coeffs;
|
||||
#if defined(USE_MMX) || defined(USE_SSE2) || defined(__bfin__)
|
||||
fir->history = kcalloc(2*taps, sizeof(int16_t), GFP_KERNEL);
|
||||
fir->history = kcalloc(2 * taps, sizeof(int16_t), GFP_KERNEL);
|
||||
#else
|
||||
fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
|
||||
#endif
|
||||
return fir->history;
|
||||
}
|
||||
|
||||
static __inline__ void fir16_flush(fir16_state_t *fir)
|
||||
static __inline__ void fir16_flush(fir16_state_t * fir)
|
||||
{
|
||||
#if defined(USE_MMX) || defined(USE_SSE2) || defined(__bfin__)
|
||||
memset(fir->history, 0, 2*fir->taps*sizeof(int16_t));
|
||||
memset(fir->history, 0, 2 * fir->taps * sizeof(int16_t));
|
||||
#else
|
||||
memset(fir->history, 0, fir->taps*sizeof(int16_t));
|
||||
memset(fir->history, 0, fir->taps * sizeof(int16_t));
|
||||
#endif
|
||||
}
|
||||
|
||||
static __inline__ void fir16_free(fir16_state_t *fir)
|
||||
static __inline__ void fir16_free(fir16_state_t * fir)
|
||||
{
|
||||
kfree(fir->history);
|
||||
}
|
||||
|
@ -137,166 +133,162 @@ static __inline__ void fir16_free(fir16_state_t *fir)
|
|||
#ifdef __bfin__
|
||||
static inline int32_t dot_asm(short *x, short *y, int len)
|
||||
{
|
||||
int dot;
|
||||
int dot;
|
||||
|
||||
len--;
|
||||
len--;
|
||||
|
||||
__asm__
|
||||
(
|
||||
"I0 = %1;\n\t"
|
||||
"I1 = %2;\n\t"
|
||||
"A0 = 0;\n\t"
|
||||
"R0.L = W[I0++] || R1.L = W[I1++];\n\t"
|
||||
"LOOP dot%= LC0 = %3;\n\t"
|
||||
"LOOP_BEGIN dot%=;\n\t"
|
||||
"A0 += R0.L * R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t"
|
||||
"LOOP_END dot%=;\n\t"
|
||||
"A0 += R0.L*R1.L (IS);\n\t"
|
||||
"R0 = A0;\n\t"
|
||||
"%0 = R0;\n\t"
|
||||
: "=&d" (dot)
|
||||
: "a" (x), "a" (y), "a" (len)
|
||||
: "I0", "I1", "A1", "A0", "R0", "R1"
|
||||
);
|
||||
__asm__("I0 = %1;\n\t"
|
||||
"I1 = %2;\n\t"
|
||||
"A0 = 0;\n\t"
|
||||
"R0.L = W[I0++] || R1.L = W[I1++];\n\t"
|
||||
"LOOP dot%= LC0 = %3;\n\t"
|
||||
"LOOP_BEGIN dot%=;\n\t"
|
||||
"A0 += R0.L * R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t"
|
||||
"LOOP_END dot%=;\n\t"
|
||||
"A0 += R0.L*R1.L (IS);\n\t"
|
||||
"R0 = A0;\n\t"
|
||||
"%0 = R0;\n\t"
|
||||
:"=&d"(dot)
|
||||
:"a"(x), "a"(y), "a"(len)
|
||||
:"I0", "I1", "A1", "A0", "R0", "R1"
|
||||
);
|
||||
|
||||
return dot;
|
||||
return dot;
|
||||
}
|
||||
#endif
|
||||
|
||||
static __inline__ int16_t fir16(fir16_state_t *fir, int16_t sample)
|
||||
static __inline__ int16_t fir16(fir16_state_t * fir, int16_t sample)
|
||||
{
|
||||
int32_t y;
|
||||
int32_t y;
|
||||
#if defined(USE_MMX)
|
||||
int i;
|
||||
mmx_t *mmx_coeffs;
|
||||
mmx_t *mmx_hist;
|
||||
int i;
|
||||
mmx_t *mmx_coeffs;
|
||||
mmx_t *mmx_hist;
|
||||
|
||||
fir->history[fir->curr_pos] = sample;
|
||||
fir->history[fir->curr_pos + fir->taps] = sample;
|
||||
fir->history[fir->curr_pos] = sample;
|
||||
fir->history[fir->curr_pos + fir->taps] = sample;
|
||||
|
||||
mmx_coeffs = (mmx_t *) fir->coeffs;
|
||||
mmx_hist = (mmx_t *) &fir->history[fir->curr_pos];
|
||||
i = fir->taps;
|
||||
pxor_r2r(mm4, mm4);
|
||||
/* 8 samples per iteration, so the filter must be a multiple of 8 long. */
|
||||
while (i > 0)
|
||||
{
|
||||
movq_m2r(mmx_coeffs[0], mm0);
|
||||
movq_m2r(mmx_coeffs[1], mm2);
|
||||
movq_m2r(mmx_hist[0], mm1);
|
||||
movq_m2r(mmx_hist[1], mm3);
|
||||
mmx_coeffs += 2;
|
||||
mmx_hist += 2;
|
||||
pmaddwd_r2r(mm1, mm0);
|
||||
pmaddwd_r2r(mm3, mm2);
|
||||
paddd_r2r(mm0, mm4);
|
||||
paddd_r2r(mm2, mm4);
|
||||
i -= 8;
|
||||
}
|
||||
movq_r2r(mm4, mm0);
|
||||
psrlq_i2r(32, mm0);
|
||||
paddd_r2r(mm0, mm4);
|
||||
movd_r2m(mm4, y);
|
||||
emms();
|
||||
mmx_coeffs = (mmx_t *) fir->coeffs;
|
||||
mmx_hist = (mmx_t *) & fir->history[fir->curr_pos];
|
||||
i = fir->taps;
|
||||
pxor_r2r(mm4, mm4);
|
||||
/* 8 samples per iteration, so the filter must be a multiple of 8 long. */
|
||||
while (i > 0) {
|
||||
movq_m2r(mmx_coeffs[0], mm0);
|
||||
movq_m2r(mmx_coeffs[1], mm2);
|
||||
movq_m2r(mmx_hist[0], mm1);
|
||||
movq_m2r(mmx_hist[1], mm3);
|
||||
mmx_coeffs += 2;
|
||||
mmx_hist += 2;
|
||||
pmaddwd_r2r(mm1, mm0);
|
||||
pmaddwd_r2r(mm3, mm2);
|
||||
paddd_r2r(mm0, mm4);
|
||||
paddd_r2r(mm2, mm4);
|
||||
i -= 8;
|
||||
}
|
||||
movq_r2r(mm4, mm0);
|
||||
psrlq_i2r(32, mm0);
|
||||
paddd_r2r(mm0, mm4);
|
||||
movd_r2m(mm4, y);
|
||||
emms();
|
||||
#elif defined(USE_SSE2)
|
||||
int i;
|
||||
xmm_t *xmm_coeffs;
|
||||
xmm_t *xmm_hist;
|
||||
int i;
|
||||
xmm_t *xmm_coeffs;
|
||||
xmm_t *xmm_hist;
|
||||
|
||||
fir->history[fir->curr_pos] = sample;
|
||||
fir->history[fir->curr_pos + fir->taps] = sample;
|
||||
fir->history[fir->curr_pos] = sample;
|
||||
fir->history[fir->curr_pos + fir->taps] = sample;
|
||||
|
||||
xmm_coeffs = (xmm_t *) fir->coeffs;
|
||||
xmm_hist = (xmm_t *) &fir->history[fir->curr_pos];
|
||||
i = fir->taps;
|
||||
pxor_r2r(xmm4, xmm4);
|
||||
/* 16 samples per iteration, so the filter must be a multiple of 16 long. */
|
||||
while (i > 0)
|
||||
{
|
||||
movdqu_m2r(xmm_coeffs[0], xmm0);
|
||||
movdqu_m2r(xmm_coeffs[1], xmm2);
|
||||
movdqu_m2r(xmm_hist[0], xmm1);
|
||||
movdqu_m2r(xmm_hist[1], xmm3);
|
||||
xmm_coeffs += 2;
|
||||
xmm_hist += 2;
|
||||
pmaddwd_r2r(xmm1, xmm0);
|
||||
pmaddwd_r2r(xmm3, xmm2);
|
||||
paddd_r2r(xmm0, xmm4);
|
||||
paddd_r2r(xmm2, xmm4);
|
||||
i -= 16;
|
||||
}
|
||||
movdqa_r2r(xmm4, xmm0);
|
||||
psrldq_i2r(8, xmm0);
|
||||
paddd_r2r(xmm0, xmm4);
|
||||
movdqa_r2r(xmm4, xmm0);
|
||||
psrldq_i2r(4, xmm0);
|
||||
paddd_r2r(xmm0, xmm4);
|
||||
movd_r2m(xmm4, y);
|
||||
xmm_coeffs = (xmm_t *) fir->coeffs;
|
||||
xmm_hist = (xmm_t *) & fir->history[fir->curr_pos];
|
||||
i = fir->taps;
|
||||
pxor_r2r(xmm4, xmm4);
|
||||
/* 16 samples per iteration, so the filter must be a multiple of 16 long. */
|
||||
while (i > 0) {
|
||||
movdqu_m2r(xmm_coeffs[0], xmm0);
|
||||
movdqu_m2r(xmm_coeffs[1], xmm2);
|
||||
movdqu_m2r(xmm_hist[0], xmm1);
|
||||
movdqu_m2r(xmm_hist[1], xmm3);
|
||||
xmm_coeffs += 2;
|
||||
xmm_hist += 2;
|
||||
pmaddwd_r2r(xmm1, xmm0);
|
||||
pmaddwd_r2r(xmm3, xmm2);
|
||||
paddd_r2r(xmm0, xmm4);
|
||||
paddd_r2r(xmm2, xmm4);
|
||||
i -= 16;
|
||||
}
|
||||
movdqa_r2r(xmm4, xmm0);
|
||||
psrldq_i2r(8, xmm0);
|
||||
paddd_r2r(xmm0, xmm4);
|
||||
movdqa_r2r(xmm4, xmm0);
|
||||
psrldq_i2r(4, xmm0);
|
||||
paddd_r2r(xmm0, xmm4);
|
||||
movd_r2m(xmm4, y);
|
||||
#elif defined(__bfin__)
|
||||
fir->history[fir->curr_pos] = sample;
|
||||
fir->history[fir->curr_pos + fir->taps] = sample;
|
||||
y = dot_asm((int16_t*)fir->coeffs, &fir->history[fir->curr_pos], fir->taps);
|
||||
fir->history[fir->curr_pos] = sample;
|
||||
fir->history[fir->curr_pos + fir->taps] = sample;
|
||||
y = dot_asm((int16_t *) fir->coeffs, &fir->history[fir->curr_pos],
|
||||
fir->taps);
|
||||
#else
|
||||
int i;
|
||||
int offset1;
|
||||
int offset2;
|
||||
int i;
|
||||
int offset1;
|
||||
int offset2;
|
||||
|
||||
fir->history[fir->curr_pos] = sample;
|
||||
fir->history[fir->curr_pos] = sample;
|
||||
|
||||
offset2 = fir->curr_pos;
|
||||
offset1 = fir->taps - offset2;
|
||||
y = 0;
|
||||
for (i = fir->taps - 1; i >= offset1; i--)
|
||||
y += fir->coeffs[i]*fir->history[i - offset1];
|
||||
for ( ; i >= 0; i--)
|
||||
y += fir->coeffs[i]*fir->history[i + offset2];
|
||||
offset2 = fir->curr_pos;
|
||||
offset1 = fir->taps - offset2;
|
||||
y = 0;
|
||||
for (i = fir->taps - 1; i >= offset1; i--)
|
||||
y += fir->coeffs[i] * fir->history[i - offset1];
|
||||
for (; i >= 0; i--)
|
||||
y += fir->coeffs[i] * fir->history[i + offset2];
|
||||
#endif
|
||||
if (fir->curr_pos <= 0)
|
||||
fir->curr_pos = fir->taps;
|
||||
fir->curr_pos--;
|
||||
return (int16_t) (y >> 15);
|
||||
if (fir->curr_pos <= 0)
|
||||
fir->curr_pos = fir->taps;
|
||||
fir->curr_pos--;
|
||||
return (int16_t) (y >> 15);
|
||||
}
|
||||
|
||||
static __inline__ const int16_t *fir32_create(fir32_state_t *fir,
|
||||
const int32_t *coeffs,
|
||||
int taps)
|
||||
static __inline__ const int16_t *fir32_create(fir32_state_t * fir,
|
||||
const int32_t * coeffs, int taps)
|
||||
{
|
||||
fir->taps = taps;
|
||||
fir->curr_pos = taps - 1;
|
||||
fir->coeffs = coeffs;
|
||||
fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
|
||||
return fir->history;
|
||||
fir->taps = taps;
|
||||
fir->curr_pos = taps - 1;
|
||||
fir->coeffs = coeffs;
|
||||
fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
|
||||
return fir->history;
|
||||
}
|
||||
|
||||
static __inline__ void fir32_flush(fir32_state_t *fir)
|
||||
static __inline__ void fir32_flush(fir32_state_t * fir)
|
||||
{
|
||||
memset(fir->history, 0, fir->taps*sizeof(int16_t));
|
||||
memset(fir->history, 0, fir->taps * sizeof(int16_t));
|
||||
}
|
||||
|
||||
static __inline__ void fir32_free(fir32_state_t *fir)
|
||||
static __inline__ void fir32_free(fir32_state_t * fir)
|
||||
{
|
||||
kfree(fir->history);
|
||||
kfree(fir->history);
|
||||
}
|
||||
|
||||
static __inline__ int16_t fir32(fir32_state_t *fir, int16_t sample)
|
||||
static __inline__ int16_t fir32(fir32_state_t * fir, int16_t sample)
|
||||
{
|
||||
int i;
|
||||
int32_t y;
|
||||
int offset1;
|
||||
int offset2;
|
||||
int i;
|
||||
int32_t y;
|
||||
int offset1;
|
||||
int offset2;
|
||||
|
||||
fir->history[fir->curr_pos] = sample;
|
||||
offset2 = fir->curr_pos;
|
||||
offset1 = fir->taps - offset2;
|
||||
y = 0;
|
||||
for (i = fir->taps - 1; i >= offset1; i--)
|
||||
y += fir->coeffs[i]*fir->history[i - offset1];
|
||||
for ( ; i >= 0; i--)
|
||||
y += fir->coeffs[i]*fir->history[i + offset2];
|
||||
if (fir->curr_pos <= 0)
|
||||
fir->curr_pos = fir->taps;
|
||||
fir->curr_pos--;
|
||||
return (int16_t) (y >> 15);
|
||||
fir->history[fir->curr_pos] = sample;
|
||||
offset2 = fir->curr_pos;
|
||||
offset1 = fir->taps - offset2;
|
||||
y = 0;
|
||||
for (i = fir->taps - 1; i >= offset1; i--)
|
||||
y += fir->coeffs[i] * fir->history[i - offset1];
|
||||
for (; i >= 0; i--)
|
||||
y += fir->coeffs[i] * fir->history[i + offset2];
|
||||
if (fir->curr_pos <= 0)
|
||||
fir->curr_pos = fir->taps;
|
||||
fir->curr_pos--;
|
||||
return (int16_t) (y >> 15);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -27,24 +27,23 @@
|
|||
* values by ULL, lest they be truncated by the compiler)
|
||||
*/
|
||||
|
||||
typedef union {
|
||||
long long q; /* Quadword (64-bit) value */
|
||||
unsigned long long uq; /* Unsigned Quadword */
|
||||
int d[2]; /* 2 Doubleword (32-bit) values */
|
||||
unsigned int ud[2]; /* 2 Unsigned Doubleword */
|
||||
short w[4]; /* 4 Word (16-bit) values */
|
||||
unsigned short uw[4]; /* 4 Unsigned Word */
|
||||
char b[8]; /* 8 Byte (8-bit) values */
|
||||
unsigned char ub[8]; /* 8 Unsigned Byte */
|
||||
float s[2]; /* Single-precision (32-bit) value */
|
||||
} mmx_t; /* On an 8-byte (64-bit) boundary */
|
||||
typedef union {
|
||||
long long q; /* Quadword (64-bit) value */
|
||||
unsigned long long uq; /* Unsigned Quadword */
|
||||
int d[2]; /* 2 Doubleword (32-bit) values */
|
||||
unsigned int ud[2]; /* 2 Unsigned Doubleword */
|
||||
short w[4]; /* 4 Word (16-bit) values */
|
||||
unsigned short uw[4]; /* 4 Unsigned Word */
|
||||
char b[8]; /* 8 Byte (8-bit) values */
|
||||
unsigned char ub[8]; /* 8 Unsigned Byte */
|
||||
float s[2]; /* Single-precision (32-bit) value */
|
||||
} mmx_t; /* On an 8-byte (64-bit) boundary */
|
||||
|
||||
/* SSE registers */
|
||||
typedef union {
|
||||
char b[16];
|
||||
} xmm_t;
|
||||
|
||||
|
||||
#define mmx_i2r(op,imm,reg) \
|
||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
|
@ -63,7 +62,6 @@ typedef union {
|
|||
#define mmx_r2r(op,regs,regd) \
|
||||
__asm__ __volatile__ (#op " %" #regs ", %" #regd)
|
||||
|
||||
|
||||
#define emms() __asm__ __volatile__ ("emms")
|
||||
|
||||
#define movd_m2r(var,reg) mmx_m2r (movd, var, reg)
|
||||
|
@ -192,16 +190,13 @@ typedef union {
|
|||
#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg)
|
||||
#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd)
|
||||
|
||||
|
||||
/* 3DNOW extensions */
|
||||
|
||||
#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg)
|
||||
#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd)
|
||||
|
||||
|
||||
/* AMD MMX extensions - also available in intel SSE */
|
||||
|
||||
|
||||
#define mmx_m2ri(op,mem,reg,imm) \
|
||||
__asm__ __volatile__ (#op " %1, %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
|
@ -216,7 +211,6 @@ typedef union {
|
|||
: /* nothing */ \
|
||||
: "m" (mem))
|
||||
|
||||
|
||||
#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg)
|
||||
|
||||
#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var)
|
||||
|
@ -284,5 +278,4 @@ typedef union {
|
|||
#define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd)
|
||||
#define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd)
|
||||
|
||||
|
||||
#endif /* AVCODEC_I386MMX_H */
|
||||
|
|
|
@ -83,4 +83,4 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx);
|
|||
*/
|
||||
int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx);
|
||||
|
||||
#endif /* __OSLEC_H */
|
||||
#endif /* __OSLEC_H */
|
||||
|
|
Loading…
Reference in a new issue