mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 13:35:12 +03:00
bugfix WIP: server device mode
This commit is contained in:
parent
371e1b8cac
commit
b559582dc4
@ -42,7 +42,7 @@ class DiffusionSVC(VoiceChangerModel):
|
||||
# その他の設定
|
||||
self.settings.tran = self.slotInfo.defaultTune
|
||||
self.settings.dstId = self.slotInfo.dstId
|
||||
self.settings.kstep = self.slotInfo.defaultKstep
|
||||
self.settings.kStep = self.slotInfo.defaultKstep
|
||||
|
||||
print("[Voice Changer] [DiffusionSVC] Initializing... done")
|
||||
|
||||
@ -86,8 +86,8 @@ class DiffusionSVC(VoiceChangerModel):
|
||||
solaSearchFrame: int = 0,
|
||||
):
|
||||
newData = newData.astype(np.float32) / 32768.0 # DiffusionSVCのモデルのサンプリングレートで入ってきている。(extraDataLength, Crossfade等も同じSRで処理)(★1)
|
||||
|
||||
new_feature_length = newData.shape[0] * 100 // self.slotInfo.samplingRate # 100 は hubertのhosizeから (16000 / 160)
|
||||
new_feature_length = int(((newData.shape[0] / self.inputSampleRate) * self.slotInfo.samplingRate) / 512) # 100 は hubertのhosizeから (16000 / 160).
|
||||
# ↑newData.shape[0]//sampleRate でデータ秒数。これに16000かけてhubertの世界でのデータ長。これにhop数(160)でわるとfeatsのデータサイズになる。
|
||||
if self.audio_buffer is not None:
|
||||
# 過去のデータに連結
|
||||
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0)
|
||||
@ -104,13 +104,14 @@ class DiffusionSVC(VoiceChangerModel):
|
||||
convertSize = convertSize + (128 - (convertSize % 128))
|
||||
|
||||
# バッファがたまっていない場合はzeroで補う
|
||||
generateFeatureLength = int(((convertSize / self.inputSampleRate) * self.slotInfo.samplingRate) / 512) + 1
|
||||
if self.audio_buffer.shape[0] < convertSize:
|
||||
self.audio_buffer = np.concatenate([np.zeros([convertSize]), self.audio_buffer])
|
||||
self.pitchf_buffer = np.concatenate([np.zeros([convertSize * 100 // self.slotInfo.samplingRate]), self.pitchf_buffer])
|
||||
self.feature_buffer = np.concatenate([np.zeros([convertSize * 100 // self.slotInfo.samplingRate, self.slotInfo.embChannels]), self.feature_buffer])
|
||||
self.pitchf_buffer = np.concatenate([np.zeros(generateFeatureLength), self.pitchf_buffer])
|
||||
self.feature_buffer = np.concatenate([np.zeros([generateFeatureLength, self.slotInfo.embChannels]), self.feature_buffer])
|
||||
|
||||
convertOffset = -1 * convertSize
|
||||
featureOffset = -convertSize * 100 // self.slotInfo.samplingRate
|
||||
featureOffset = -1 * generateFeatureLength
|
||||
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
|
||||
self.pitchf_buffer = self.pitchf_buffer[featureOffset:]
|
||||
self.feature_buffer = self.feature_buffer[featureOffset:]
|
||||
|
@ -143,8 +143,6 @@ class Pipeline(object):
|
||||
f0_up_key,
|
||||
silence_front=silence_front,
|
||||
)
|
||||
# def extract(self, audio: AudioInOut, sr: int, block_size: int, model_sr: int, pitch, f0_up_key, silence_front=0):
|
||||
|
||||
pitch = torch.tensor(pitch[-n_frames:], device=self.device).unsqueeze(0).long()
|
||||
except IndexError as e: # NOQA
|
||||
raise NotEnoughDataExtimateF0()
|
||||
|
@ -101,7 +101,7 @@ class RVC(VoiceChangerModel):
|
||||
solaSearchFrame: int = 0,
|
||||
):
|
||||
newData = newData.astype(np.float32) / 32768.0 # RVCのモデルのサンプリングレートで入ってきている。(extraDataLength, Crossfade等も同じSRで処理)(★1)
|
||||
|
||||
# ↑newData.shape[0]//sampleRate でデータ秒数。これに16000かけてhubertの世界でのデータ長。これにhop数(160)でわるとfeatsのデータサイズになる。
|
||||
new_feature_length = newData.shape[0] * 100 // self.slotInfo.samplingRate
|
||||
if self.audio_buffer is not None:
|
||||
# 過去のデータに連結
|
||||
|
@ -84,6 +84,12 @@ class VoiceChanger(VoiceChangerIF):
|
||||
def setModel(self, model: Any):
|
||||
self.voiceChanger = model
|
||||
|
||||
def setInputSampleRate(self, sr: int):
|
||||
self.settings.inputSampleRate = sr
|
||||
|
||||
def setOutputSampleRate(self, sr: int):
|
||||
self.settings.outputSampleRate = sr
|
||||
|
||||
def get_info(self):
|
||||
data = asdict(self.settings)
|
||||
if self.voiceChanger is not None:
|
||||
|
@ -53,10 +53,10 @@ class VoiceChangerManager(ServerDeviceCallbacks):
|
||||
return self.voiceChanger.get_processing_sampling_rate()
|
||||
|
||||
def setInputSamplingRate(self, sr: int):
|
||||
self.voiceChanger.settings.inputSampleRate = sr
|
||||
self.voiceChanger.setInputSampleRate(sr)
|
||||
|
||||
def setOutputSamplingRate(self, sr: int):
|
||||
self.voiceChanger.settings.outputSampleRate = sr
|
||||
self.voiceChanger.setOutputSampleRate(sr)
|
||||
|
||||
############################
|
||||
# VoiceChangerManager
|
||||
|
@ -95,6 +95,14 @@ class VoiceChangerV2(VoiceChangerIF):
|
||||
self.voiceChanger = model
|
||||
self.voiceChanger.setSamplingRate(self.settings.inputSampleRate, self.settings.outputSampleRate)
|
||||
|
||||
def setInputSampleRate(self, sr: int):
|
||||
self.settings.inputSampleRate = sr
|
||||
self.voiceChanger.setSamplingRate(self.settings.inputSampleRate, self.settings.outputSampleRate)
|
||||
|
||||
def setOutputSampleRate(self, sr: int):
|
||||
self.settings.outputSampleRate = sr
|
||||
self.voiceChanger.setSamplingRate(self.settings.inputSampleRate, self.settings.outputSampleRate)
|
||||
|
||||
def get_info(self):
|
||||
data = asdict(self.settings)
|
||||
if self.voiceChanger is not None:
|
||||
|
@ -25,3 +25,9 @@ class VoiceChangerIF(Protocol):
|
||||
|
||||
def export2onnx() -> Any:
|
||||
...
|
||||
|
||||
def setInputSampleRate(self, sr: int):
|
||||
...
|
||||
|
||||
def setOutputSampleRate(self, sr: int):
|
||||
...
|
||||
|
Loading…
Reference in New Issue
Block a user