mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-03-12 10:20:11 +03:00
WIP: support so-vits-svc, message impruvement
This commit is contained in:
parent
010eb395ef
commit
d7cff0709f
@ -141,7 +141,8 @@ class SoVitsSvc40v2:
|
|||||||
wav_44k = audio_buffer
|
wav_44k = audio_buffer
|
||||||
# f0 = utils.compute_f0_parselmouth(wav, sampling_rate=self.target_sample, hop_length=self.hop_size)
|
# f0 = utils.compute_f0_parselmouth(wav, sampling_rate=self.target_sample, hop_length=self.hop_size)
|
||||||
f0 = utils.compute_f0_dio(wav_44k, sampling_rate=self.hps.data.sampling_rate, hop_length=self.hps.data.hop_length)
|
f0 = utils.compute_f0_dio(wav_44k, sampling_rate=self.hps.data.sampling_rate, hop_length=self.hps.data.hop_length)
|
||||||
print(f"--- >>>>> ---- >>>> {wav_44k.shape[0] / self.hps.data.hop_length}")
|
if wav_44k.shape[0] % self.hps.data.hop_length != 0:
|
||||||
|
print(f" !!! !!! !!! wav size not multiple of hopsize: {wav_44k.shape[0] / self.hps.data.hop_length}")
|
||||||
|
|
||||||
f0, uv = utils.interpolate_f0(f0)
|
f0, uv = utils.interpolate_f0(f0)
|
||||||
f0 = torch.FloatTensor(f0)
|
f0 = torch.FloatTensor(f0)
|
||||||
|
@ -147,7 +147,7 @@ class VoiceChanger():
|
|||||||
# receivedData: tuple of short
|
# receivedData: tuple of short
|
||||||
def on_request(self, receivedData: any):
|
def on_request(self, receivedData: any):
|
||||||
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
|
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
|
||||||
print(f"------------ Convert processing.... ------------")
|
print_convert_processing(f"------------ Convert processing.... ------------")
|
||||||
# 前処理
|
# 前処理
|
||||||
with Timer("pre-process") as t:
|
with Timer("pre-process") as t:
|
||||||
|
|
||||||
@ -158,7 +158,8 @@ class VoiceChanger():
|
|||||||
|
|
||||||
inputSize = newData.shape[0]
|
inputSize = newData.shape[0]
|
||||||
convertSize = inputSize + min(self.settings.crossFadeOverlapSize, inputSize)
|
convertSize = inputSize + min(self.settings.crossFadeOverlapSize, inputSize)
|
||||||
print(f" Input data size of {receivedData.shape[0]}/{self.settings.inputSampleRate}hz {inputSize}/{processing_sampling_rate}hz")
|
print_convert_processing(
|
||||||
|
f" Input data size of {receivedData.shape[0]}/{self.settings.inputSampleRate}hz {inputSize}/{processing_sampling_rate}hz")
|
||||||
|
|
||||||
if convertSize < 8192:
|
if convertSize < 8192:
|
||||||
convertSize = 8192
|
convertSize = 8192
|
||||||
@ -170,8 +171,9 @@ class VoiceChanger():
|
|||||||
overlapSize = min(self.settings.crossFadeOverlapSize, inputSize)
|
overlapSize = min(self.settings.crossFadeOverlapSize, inputSize)
|
||||||
cropRange = (-1 * (inputSize + overlapSize), -1 * overlapSize)
|
cropRange = (-1 * (inputSize + overlapSize), -1 * overlapSize)
|
||||||
|
|
||||||
print(f" Convert input data size of {convertSize}")
|
print_convert_processing(f" Convert input data size of {convertSize}")
|
||||||
print(f" overlap:{overlapSize}, cropRange:{cropRange}")
|
print_convert_processing(f" overlap:{overlapSize}, cropRange:{cropRange}")
|
||||||
|
|
||||||
self._generate_strength(inputSize)
|
self._generate_strength(inputSize)
|
||||||
data = self.voiceChanger.generate_input(newData, convertSize, cropRange)
|
data = self.voiceChanger.generate_input(newData, convertSize, cropRange)
|
||||||
preprocess_time = t.secs
|
preprocess_time = t.secs
|
||||||
@ -190,16 +192,18 @@ class VoiceChanger():
|
|||||||
cur_overlap = audio[cur_overlap_start:cur_overlap_end]
|
cur_overlap = audio[cur_overlap_start:cur_overlap_end]
|
||||||
# cur_overlap = audio[-1 * (inputSize + overlapSize):-1 * inputSize]
|
# cur_overlap = audio[-1 * (inputSize + overlapSize):-1 * inputSize]
|
||||||
powered_prev = prev_overlap * self.np_prev_strength
|
powered_prev = prev_overlap * self.np_prev_strength
|
||||||
print(f" ---- ---- ---- audio:{audio.shape}, cur_overlap:{cur_overlap.shape}, self.np_cur_strength:{self.np_cur_strength.shape}")
|
print_convert_processing(
|
||||||
print(f" ---- ---- ---------------- {cur_overlap_start}, {cur_overlap_end}")
|
f" audio:{audio.shape}, cur_overlap:{cur_overlap.shape}, self.np_cur_strength:{self.np_cur_strength.shape}")
|
||||||
|
print_convert_processing(f" cur_overlap_strt:{cur_overlap_start}, cur_overlap_end{cur_overlap_end}")
|
||||||
powered_cur = cur_overlap * self.np_cur_strength
|
powered_cur = cur_overlap * self.np_cur_strength
|
||||||
powered_result = powered_prev + powered_cur
|
powered_result = powered_prev + powered_cur
|
||||||
|
|
||||||
cur = audio[-1 * inputSize:-1 * overlapSize]
|
cur = audio[-1 * inputSize:-1 * overlapSize]
|
||||||
result = np.concatenate([powered_result, cur], axis=0)
|
result = np.concatenate([powered_result, cur], axis=0)
|
||||||
print(f" overlap:{overlapSize}, current:{cur.shape[0]}, result:{result.shape[0]}... result should be same as input")
|
print_convert_processing(
|
||||||
# print(prev_overlap.shape, self.np_prev_strength.shape, cur_overlap.shape, self.np_cur_strength.shape)
|
f" overlap:{overlapSize}, current:{cur.shape[0]}, result:{result.shape[0]}... result should be same as input")
|
||||||
# print(">>>>>>>>>>>", -1 * (inputSize + overlapSize), -1 * inputSize, self.np_prev_audio1.shape, overlapSize)
|
if cur.shape[0] != result.shape[0]:
|
||||||
|
print_convert_processing(f" current and result should be same as input")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
result = np.zeros(4096).astype(np.int16)
|
result = np.zeros(4096).astype(np.int16)
|
||||||
@ -221,7 +225,8 @@ class VoiceChanger():
|
|||||||
else:
|
else:
|
||||||
outputData = result
|
outputData = result
|
||||||
|
|
||||||
print(f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
print_convert_processing(
|
||||||
|
f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
||||||
|
|
||||||
if self.settings.recordIO == 1:
|
if self.settings.recordIO == 1:
|
||||||
self.ioRecorder.writeInput(receivedData)
|
self.ioRecorder.writeInput(receivedData)
|
||||||
@ -229,16 +234,26 @@ class VoiceChanger():
|
|||||||
|
|
||||||
if receivedData.shape[0] != outputData.shape[0]:
|
if receivedData.shape[0] != outputData.shape[0]:
|
||||||
outputData = pad_array(outputData, receivedData.shape[0])
|
outputData = pad_array(outputData, receivedData.shape[0])
|
||||||
print(
|
print_convert_processing(
|
||||||
f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
|
||||||
|
|
||||||
postprocess_time = t.secs
|
postprocess_time = t.secs
|
||||||
|
|
||||||
print(" [fin] Input/Output size:", receivedData.shape[0], outputData.shape[0])
|
print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}")
|
||||||
perf = [preprocess_time, mainprocess_time, postprocess_time]
|
perf = [preprocess_time, mainprocess_time, postprocess_time]
|
||||||
return outputData, perf
|
return outputData, perf
|
||||||
|
|
||||||
|
|
||||||
|
##############
|
||||||
|
PRINT_CONVERT_PROCESSING = False
|
||||||
|
# PRINT_CONVERT_PROCESSING = True
|
||||||
|
|
||||||
|
|
||||||
|
def print_convert_processing(mess: str):
|
||||||
|
if PRINT_CONVERT_PROCESSING == True:
|
||||||
|
print(mess)
|
||||||
|
|
||||||
|
|
||||||
def pad_array(arr, target_length):
|
def pad_array(arr, target_length):
|
||||||
current_length = arr.shape[0]
|
current_length = arr.shape[0]
|
||||||
if current_length >= target_length:
|
if current_length >= target_length:
|
||||||
@ -250,8 +265,6 @@ def pad_array(arr, target_length):
|
|||||||
padded_arr = np.pad(arr, (pad_left, pad_right), 'constant', constant_values=(0, 0))
|
padded_arr = np.pad(arr, (pad_left, pad_right), 'constant', constant_values=(0, 0))
|
||||||
return padded_arr
|
return padded_arr
|
||||||
|
|
||||||
##############
|
|
||||||
|
|
||||||
|
|
||||||
class Timer(object):
|
class Timer(object):
|
||||||
def __init__(self, title: str):
|
def __init__(self, title: str):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user