WIP: so-vits-svc 40v2, alpha (refactoring4 apply for v13)

This commit is contained in:
wataru 2023-03-13 02:48:56 +09:00
parent 5818292046
commit ef6fdd8d49
4 changed files with 17 additions and 11 deletions

File diff suppressed because one or more lines are too long

View File

@ -28,7 +28,7 @@ const App = () => {
return (
<div className="top-title">
<span className="title">Voice Changer Setting</span>
<span className="top-title-version">for v.1.3.x</span>
<span className="top-title-version">for MMVC v.1.3.x</span>
<span className="belongings">
<a className="link" href="https://github.com/w-okada/voice-changer" target="_blank" rel="noopener noreferrer">
<img src="./assets/icons/github.svg" />

View File

@ -118,6 +118,9 @@ class MMVCv13:
return data
def get_processing_sampling_rate(self):
return self.hps.data.sampling_rate
def _get_spec(self, audio: any):
spec = spectrogram_torch(audio, self.hps.data.filter_length,
self.hps.data.sampling_rate, self.hps.data.hop_length, self.hps.data.win_length,
@ -125,19 +128,24 @@ class MMVCv13:
spec = torch.squeeze(spec, 0)
return spec
def generate_input(self, newData: any, convertSize: int):
newData = newData.astype(np.float32)
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int):
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
if hasattr(self, "audio_buffer"):
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
else:
self.audio_buffer = newData
self.audio_buffer = self.audio_buffer[-(convertSize):] # 変換対象の部分だけ抽出
convertSize = inputSize + crossfadeSize
if convertSize < 8192:
convertSize = 8192
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
self.audio_buffer = self.audio_buffer[-1 * convertSize:] # 変換対象の部分だけ抽出
audio = torch.FloatTensor(self.audio_buffer)
audio_norm = audio / self.hps.data.max_wav_value # normalize
audio_norm = audio_norm.unsqueeze(0) # unsqueeze
audio_norm = audio.unsqueeze(0) # unsqueeze
spec = self._get_spec(audio_norm)
sid = torch.LongTensor([int(self.settings.srcId)])

View File

@ -149,9 +149,7 @@ class MMVCv15:
def _get_spec(self, newData: any):
audio = torch.FloatTensor(newData)
# audio_norm = audio / self.hps.data.max_wav_value # normalize
audio_norm = audio
audio_norm = audio_norm.unsqueeze(0) # unsqueeze
audio_norm = audio.unsqueeze(0) # unsqueeze
spec = spectrogram_torch(audio_norm, self.hps.data.filter_length,
self.hps.data.sampling_rate, self.hps.data.hop_length, self.hps.data.win_length,
center=False)