Skip to content

Commit

Permalink
Implement sum_sqr_shift() using two passes with no branch inside the …
Browse files Browse the repository at this point in the history
…loops

Slightly slower on x86, about the same speed on ARMv7, should be faster on
DSPs.
  • Loading branch information
jmvalin committed Jul 17, 2016
1 parent fe4d91c commit 7c64560
Showing 1 changed file with 24 additions and 27 deletions.
51 changes: 24 additions & 27 deletions silk/sum_sqr_shift.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,43 +41,40 @@ void silk_sum_sqr_shift(
)
{
opus_int i, shft;
opus_int32 nrg_tmp, nrg;
opus_uint32 nrg_tmp;
opus_int32 nrg;

nrg = 0;
shft = 0;
len--;
for( i = 0; i < len; i += 2 ) {
nrg = silk_SMLABB_ovflw( nrg, x[ i ], x[ i ] );
nrg = silk_SMLABB_ovflw( nrg, x[ i + 1 ], x[ i + 1 ] );
if( nrg < 0 ) {
/* Scale down */
nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
shft = 2;
i+=2;
break;
}
/* Do a first run with the maximum shift we could have. */
shft = 31-silk_CLZ32(len);
/* Let's be conservative with rounding and start with nrg=len. */
nrg = len;
for( i = 0; i < len - 1; i += 2 ) {
nrg_tmp = silk_SMULBB( x[ i ], x[ i ] );
nrg_tmp = silk_SMLABB_ovflw( nrg_tmp, x[ i + 1 ], x[ i + 1 ] );
nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft );
}
for( ; i < len; i += 2 ) {
if( i < len ) {
/* One sample left to process */
nrg_tmp = silk_SMULBB( x[ i ], x[ i ] );
nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft );
}
silk_assert( nrg >= 0 );
/* Make sure the result will fit in a 32-bit signed integer with two bits
of headroom. */
shft = silk_max_32(0, shft+3 - silk_CLZ32(nrg));
nrg = 0;
for( i = 0 ; i < len - 1; i += 2 ) {
nrg_tmp = silk_SMULBB( x[ i ], x[ i ] );
nrg_tmp = silk_SMLABB_ovflw( nrg_tmp, x[ i + 1 ], x[ i + 1 ] );
nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, (opus_uint32)nrg_tmp, shft );
if( nrg < 0 ) {
/* Scale down */
nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
shft += 2;
}
nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft );
}
if( i == len ) {
if( i < len ) {
/* One sample left to process */
nrg_tmp = silk_SMULBB( x[ i ], x[ i ] );
nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft );
}

/* Make sure to have at least one extra leading zero (two leading zeros in total) */
if( nrg & 0xC0000000 ) {
nrg = silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
shft += 2;
}
silk_assert( nrg >= 0 );

/* Output arguments */
*shift = shft;
Expand Down

0 comments on commit 7c64560

Please sign in to comment.