Skip to content

Commit c52b5c2

Browse files
committed
add onnx hifigan test
1 parent 6090ecb commit c52b5c2

2 files changed

Lines changed: 134 additions & 0 deletions

File tree

onnx_export_hifigan.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# coding=utf8
2+
3+
import os
4+
import sys
5+
import inference.svs.ds_e2e as e2e
6+
from utils.audio import save_wav
7+
from utils.hparams import set_hparams, hparams
8+
9+
import torch
10+
11+
root_dir = os.path.dirname(os.path.abspath(__file__))
12+
os.environ['PYTHONPATH'] = f'"{root_dir}"'
13+
14+
sys.argv = [
15+
f'{root_dir}/inference/svs/ds_e2e.py',
16+
'--config',
17+
f'{root_dir}/usr/configs/midi/e2e/opencpop/ds100_adj_rel.yaml',
18+
'--exp_name',
19+
'0228_opencpop_ds100_rel'
20+
]
21+
22+
if __name__ == '__main__':
23+
24+
set_hparams(print_hparams=False)
25+
infer_ins = e2e.DiffSingerE2EInfer(hparams)
26+
27+
infer_ins.vocoder.to('cpu')
28+
with torch.no_grad():
29+
x = torch.rand(1, 80, 100)
30+
f0 = torch.rand(1, 100)
31+
32+
torch.onnx.export(
33+
infer_ins.vocoder,
34+
(
35+
x,
36+
f0
37+
),
38+
"hifigan.onnx",
39+
input_names=["x", "f0"],
40+
output_names=["y"],
41+
dynamic_axes={
42+
"x": {
43+
0: "hop_size",
44+
1: "win_size",
45+
2: "fft_size",
46+
},
47+
"f0": {
48+
0: "len",
49+
1: "frames"
50+
},
51+
"y": {
52+
0: "len",
53+
1: "frames",
54+
2: "batch_size"
55+
}
56+
},
57+
opset_version=11
58+
)
59+
60+
print("OK")

onnx_test_hifigan.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# coding=utf8
2+
3+
import os
4+
import sys
5+
import inference.svs.ds_e2e as e2e
6+
from utils.audio import save_wav
7+
from utils.hparams import set_hparams, hparams
8+
9+
import torch
10+
import onnxruntime as ort
11+
12+
root_dir = os.path.dirname(os.path.abspath(__file__))
13+
os.environ['PYTHONPATH'] = f'"{root_dir}"'
14+
15+
sys.argv = [
16+
f'{root_dir}/inference/svs/ds_e2e.py',
17+
'--config',
18+
f'{root_dir}/usr/configs/midi/e2e/opencpop/ds100_adj_rel.yaml',
19+
'--exp_name',
20+
'0228_opencpop_ds100_rel'
21+
]
22+
23+
24+
def to_numpy(tensor):
25+
return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
26+
27+
28+
class TestHifiganInfer(e2e.DiffSingerE2EInfer):
29+
def __init__(self, hparams, device=None):
30+
super().__init__(hparams, device)
31+
32+
self.vocoder2 = ort.InferenceSession("hifigan.onnx")
33+
34+
def run_vocoder(self, c, **kwargs):
35+
c = c.transpose(2, 1) # [B, 80, T]
36+
f0 = kwargs.get('f0') # [B, T]
37+
38+
if f0 is not None and hparams.get('use_nsf'):
39+
ort_inputs = {
40+
'x': to_numpy(c),
41+
'f0': to_numpy(f0)
42+
}
43+
else:
44+
ort_inputs = {
45+
'x': to_numpy(c),
46+
'f0': {}
47+
}
48+
# [T]
49+
50+
ort_out = self.vocoder2.run(None, ort_inputs)
51+
y = torch.from_numpy(ort_out[0]).to(self.device)
52+
53+
return y[None]
54+
55+
56+
if __name__ == '__main__':
57+
c = {
58+
'text': '小酒窝长睫毛AP是你最美的记号',
59+
'notes': 'C#4/Db4 | F#4/Gb4 | G#4/Ab4 | A#4/Bb4 F#4/Gb4 | F#4/Gb4 C#4/Db4 | C#4/Db4 | rest | C#4/Db4 | A#4/Bb4 | G#4/Ab4 | A#4/Bb4 | G#4/Ab4 | F4 | C#4/Db4',
60+
'notes_duration': '0.407140 | 0.376190 | 0.242180 | 0.509550 0.183420 | 0.315400 0.235020 | 0.361660 | 0.223070 | 0.377270 | 0.340550 | 0.299620 | 0.344510 | 0.283770 | 0.323390 | 0.360340',
61+
'input_type': 'word'
62+
} # user input: Chinese characters
63+
64+
target = "./infer_out/onnx_test_hifigan_res.wav"
65+
66+
set_hparams(print_hparams=False)
67+
infer_ins = TestHifiganInfer(hparams)
68+
69+
out = infer_ins.infer_once(c)
70+
os.makedirs(os.path.dirname(target), exist_ok=True)
71+
print(f'| save audio: {target}')
72+
save_wav(out, target, hparams['audio_sample_rate'])
73+
74+
print("OK")

0 commit comments

Comments
 (0)