Skip to content

Commit 702e5b2

Browse files
feature calculation for BWE added
1 parent 0eaa9dc commit 702e5b2

File tree

6 files changed

+206
-3
lines changed

6 files changed

+206
-3
lines changed

dnn/bwe_demo.c

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "lpcnet.h"
3737
#include "os_support.h"
3838
#include "cpu_support.h"
39+
#include "osce_features.h"
3940

4041

4142
void usage(void) {
@@ -60,7 +61,40 @@ int main(int argc, char **argv) {
6061
exit(1);
6162
}
6263

63-
printf("BWE coming soon!\n");
64+
printf("Feature calculation with signal (100 * (n % 90)) - 8900\n");
65+
int n = 0, i;
66+
opus_int16 frame[160];
67+
int frame_counter = 0;
68+
float features[32 + 2 * 41];
69+
70+
for (frame_counter = 0; frame_counter < 10; frame_counter ++)
71+
{
72+
for (i = 0; i < 160; i ++ )
73+
{
74+
frame[i] = 100 * n++ - 8900;
75+
n = n % 90;
76+
}
77+
78+
osce_bwe_calculate_features(features, frame, 160);
79+
80+
printf("frame[%d]\n", frame_counter);
81+
printf("lmspec: ");
82+
for (i = 0; i < 32; i ++)
83+
{
84+
printf(" %f ", features[i]);
85+
}
86+
printf("\nphasediff: ");
87+
for (;i < 32 + 2 * 41; i ++)
88+
{
89+
printf(" %f ", features[i]);
90+
}
91+
printf("\n\n");
92+
93+
fwrite(frame, sizeof(frame[0]), 160, fout);
94+
95+
}
96+
97+
6498

6599
fclose(fin);
66100
fclose(fout);

dnn/osce_features.c

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,13 @@ static const int center_bins_noisy[18] = {
6868
136, 160
6969
};
7070

71+
static const int center_bins_bwe[32] = {
72+
0, 5, 10, 15, 20, 25, 30, 35,
73+
40, 45, 50, 55, 60, 65, 70, 75,
74+
80, 85, 90, 95, 100, 105, 110, 115,
75+
120, 125, 130, 135, 140, 145, 150, 160
76+
};
77+
7178
static const float band_weights_clean[64] = {
7279
0.666666666667f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
7380
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
@@ -95,6 +102,17 @@ static const float band_weights_noisy[18] = {
95102
0.041666666667f, 0.080000000000f
96103
};
97104

105+
static const float band_weights_bwe[32] = {
106+
0.333333333, 0.200000000, 0.200000000, 0.200000000,
107+
0.200000000, 0.200000000, 0.200000000, 0.200000000,
108+
0.200000000, 0.200000000, 0.200000000, 0.200000000,
109+
0.200000000, 0.200000000, 0.200000000, 0.200000000,
110+
0.200000000, 0.200000000, 0.200000000, 0.200000000,
111+
0.200000000, 0.200000000, 0.200000000, 0.200000000,
112+
0.200000000, 0.200000000, 0.200000000, 0.200000000,
113+
0.200000000, 0.200000000, 0.133333333, 0.181818182
114+
};
115+
98116
static float osce_window[OSCE_SPEC_WINDOW_SIZE] = {
99117
0.004908718808f, 0.014725683311f, 0.024541228523f, 0.034354408400f, 0.044164277127f,
100118
0.053969889210f, 0.063770299562f, 0.073564563600f, 0.083351737332f, 0.093130877450f,
@@ -440,6 +458,98 @@ void osce_calculate_features(
440458
}
441459

442460

461+
#define OSCE_BWE_MAX_INSTAFREQ_BIN 40
462+
#define OSCE_BWE_HALF_WINDOW_SIZE 160
463+
#define OSCE_BWE_WINDOW_SIZE (2 * (OSCE_BWE_HALF_WINDOW_SIZE))
464+
#define OSCE_BWE_NUM_BANDS 32
465+
466+
void osce_bwe_calculate_features(
467+
float *features, /* O input features */
468+
const opus_int16 xq[], /* I Decoded speech */
469+
int num_samples /* I number of input samples */
470+
)
471+
{
472+
static int init;
473+
static float signal_history[OSCE_BWE_HALF_WINDOW_SIZE];
474+
static float last_spec[2 * OSCE_BWE_MAX_INSTAFREQ_BIN + 2] = {0};
475+
476+
int n, k;
477+
kiss_fft_cpx fft_buffer[OSCE_BWE_WINDOW_SIZE];
478+
float spec[2 * OSCE_BWE_MAX_INSTAFREQ_BIN + 2];
479+
float buffer[OSCE_BWE_WINDOW_SIZE];
480+
float mag_spec[OSCE_SPEC_NUM_FREQS];
481+
float *lmspec, *instafreq;
482+
483+
/* OSCE_BWE_WINDOW_SIZE == 320 is a hard requirement */
484+
celt_assert(num_samples == OSCE_BWE_HALF_WINDOW_SIZE && OSCE_BWE_WINDOW_SIZE == 320);
485+
486+
if (init == 0)
487+
{
488+
/* ToDo: fix python and retrain */
489+
for (k = 0; k <= OSCE_BWE_MAX_INSTAFREQ_BIN; k ++)
490+
{
491+
last_spec[2*k] = 1e-9;
492+
}
493+
init = 1;
494+
}
495+
496+
lmspec = features;
497+
instafreq = features + OSCE_BWE_NUM_BANDS;
498+
499+
OPUS_COPY(buffer, signal_history, OSCE_BWE_HALF_WINDOW_SIZE);
500+
for (n = 0; n < num_samples; n++)
501+
{
502+
buffer[n + OSCE_BWE_HALF_WINDOW_SIZE] = (float) xq[n] / (1U<<15);
503+
}
504+
505+
/* update signal history buffer */
506+
OPUS_COPY(signal_history, buffer + OSCE_BWE_HALF_WINDOW_SIZE, OSCE_BWE_HALF_WINDOW_SIZE);
507+
508+
/* apply window */
509+
for (n = 0; n < OSCE_BWE_WINDOW_SIZE; n ++)
510+
{
511+
buffer[n] *= osce_window[n];
512+
}
513+
514+
/* DFT */
515+
forward_transform(fft_buffer, buffer);
516+
517+
/* instafreq */
518+
for (k = 0; k <= OSCE_BWE_MAX_INSTAFREQ_BIN; k++)
519+
{
520+
521+
float aux_r, aux_i, aux_abs;
522+
float re1, re2, im1, im2;
523+
spec[2*k] = OSCE_BWE_WINDOW_SIZE * fft_buffer[k].r + 1e-9; /* ToDo: remove 1e-9 from python code*/
524+
spec[2*k+1] = OSCE_BWE_WINDOW_SIZE * fft_buffer[k].i;
525+
re1 = spec[2*k];
526+
im1 = spec[2*k+1];
527+
re2 = last_spec[2*k];
528+
im2 = last_spec[2*k+1];
529+
aux_r = re1 * re2 + im1 * im2;
530+
aux_i = im1 * re2 - re1 * im2;
531+
aux_abs = sqrt(aux_r * aux_r + aux_i * aux_i);
532+
instafreq[k] = aux_r / (aux_abs + 1e-9);
533+
instafreq[k + OSCE_BWE_MAX_INSTAFREQ_BIN + 1] = aux_i / (aux_abs + 1e-9);
534+
}
535+
536+
/* erb-scale magnitude spectrogram */
537+
for (k = 0; k < OSCE_SPEC_NUM_FREQS; k ++)
538+
{
539+
mag_spec[k] = OSCE_BWE_WINDOW_SIZE * sqrt(fft_buffer[k].r * fft_buffer[k].r + fft_buffer[k].i * fft_buffer[k].i);
540+
}
541+
542+
apply_filterbank(lmspec, mag_spec, center_bins_bwe, band_weights_bwe, OSCE_BWE_NUM_BANDS);
543+
544+
for (k = 0; k < OSCE_BWE_NUM_BANDS; k++)
545+
{
546+
lmspec[k] = log(lmspec[k] + 1e-9);
547+
}
548+
549+
/* update instafreq buffer */
550+
OPUS_COPY(last_spec, spec, 2 * OSCE_BWE_MAX_INSTAFREQ_BIN + 2);
551+
}
552+
443553
void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length)
444554
{
445555
int i;

dnn/osce_features.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ void osce_calculate_features(
4444
opus_int32 num_bits /* I Size of SILK payload in bits */
4545
);
4646

47+
void osce_bwe_calculate_features(
48+
float *features, /* O input features */
49+
const opus_int16 xq[], /* I Decoded speech */
50+
int num_samples /* I number of input samples */
51+
);
4752

4853
void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length);
4954

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import argparse
2+
import sys
3+
sys.path.append('./')
4+
5+
import torch
6+
from utils.spec import create_filter_bank
7+
import numpy as np
8+
9+
10+
parser = argparse.ArgumentParser()
11+
parser.add_argument('checkpoint', type=str)
12+
13+
14+
15+
if __name__ == "__main__":
16+
args = parser.parse_args()
17+
18+
c = torch.load(args.checkpoint, map_location='cpu')
19+
20+
num_bands = c['setup']['data']['spec_num_bands']
21+
fb, center_bins = create_filter_bank(num_bands, n_fft=320, fs=16000, scale='erb', round_center_bins=True, normalize=False, return_center_bins=True)
22+
weights = 1/fb.sum(axis=-1)
23+
24+
print(f"center_bins:")
25+
26+
print("".join([f"{int(cb):4d}," for cb in center_bins]))
27+
28+
print(f"band_weights:")
29+
print("".join([f" {w:1.9f}," for w in weights]))
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import argparse
2+
import sys
3+
sys.path.append('./')
4+
5+
import torch
6+
from utils.bwe_features import load_inference_data
7+
import numpy as np
8+
9+
parser = argparse.ArgumentParser()
10+
parser.add_argument('testsignal', type=str)
11+
12+
if __name__ == "__main__":
13+
args = parser.parse_args()
14+
15+
_, features = load_inference_data(args.testsignal)
16+
17+
N = features.shape[0]
18+
19+
for n in range(N):
20+
print(f"frame[{n}]")
21+
print(f"lmspec: {features[n, :32]}")
22+
print(f"freqs: {features[n,32:]}")

dnn/torch/osce/utils/spec.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def gen_filterbank(N, Fs=16000, keep_size=False):
6565
RE = RE/norm[:, np.newaxis]
6666
return torch.from_numpy(RE)
6767

68-
def create_filter_bank(num_bands, n_fft=320, fs=16000, scale='bark', round_center_bins=False, return_upper=False, normalize=False):
68+
def create_filter_bank(num_bands, n_fft=320, fs=16000, scale='bark', round_center_bins=False, return_upper=False, normalize=False, return_center_bins=False):
6969

7070
f0 = 0
7171
num_bins = n_fft // 2 + 1
@@ -111,6 +111,9 @@ def create_filter_bank(num_bands, n_fft=320, fs=16000, scale='bark', round_cente
111111
if normalize:
112112
filter_bank = filter_bank / np.sum(filter_bank, axis=1).reshape(-1, 1)
113113

114+
if return_center_bins:
115+
return filter_bank, center_bins
116+
114117
return filter_bank
115118

116119

@@ -232,7 +235,7 @@ def instafreq(x, frame_size, max_bin, window=None):
232235
X = np.fft.fft(x_unfold, n=frame_size, axis=-1)
233236

234237
# instantaneus frequency
235-
X_trunc = X[..., :max_bin + 1] + 1e-9
238+
X_trunc = X[..., :max_bin + 1] + 1e-9
236239
Y = X_trunc[1:] * np.conj(X_trunc[:-1])
237240
Y = Y / (np.abs(Y) + 1e-9)
238241

0 commit comments

Comments
 (0)