mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 13:35:12 +03:00
WIP: so-vits-svc 40v2, alpha (refactoring4 apply for v15)
This commit is contained in:
parent
7a2fd74d6c
commit
5818292046
4
client/demo_v15/dist/index.js
vendored
4
client/demo_v15/dist/index.js
vendored
File diff suppressed because one or more lines are too long
@ -28,7 +28,7 @@ const App = () => {
|
||||
return (
|
||||
<div className="top-title">
|
||||
<span className="title">Voice Changer Setting</span>
|
||||
<span className="top-title-version">for v.1.5.x</span>
|
||||
<span className="top-title-version">for MMVC v.1.5.x</span>
|
||||
<span className="belongings">
|
||||
<a className="link" href="https://github.com/w-okada/voice-changer" target="_blank" rel="noopener noreferrer">
|
||||
<img src="./assets/icons/github.svg" />
|
||||
|
@ -132,6 +132,9 @@ class MMVCv15:
|
||||
|
||||
return data
|
||||
|
||||
def get_processing_sampling_rate(self):
|
||||
return self.hps.data.sampling_rate
|
||||
|
||||
def _get_f0(self, detector: str, newData: any):
|
||||
|
||||
audio_norm_np = newData.astype(np.float64)
|
||||
@ -146,7 +149,8 @@ class MMVCv15:
|
||||
|
||||
def _get_spec(self, newData: any):
|
||||
audio = torch.FloatTensor(newData)
|
||||
audio_norm = audio / self.hps.data.max_wav_value # normalize
|
||||
# audio_norm = audio / self.hps.data.max_wav_value # normalize
|
||||
audio_norm = audio
|
||||
audio_norm = audio_norm.unsqueeze(0) # unsqueeze
|
||||
spec = spectrogram_torch(audio_norm, self.hps.data.filter_length,
|
||||
self.hps.data.sampling_rate, self.hps.data.hop_length, self.hps.data.win_length,
|
||||
@ -154,15 +158,21 @@ class MMVCv15:
|
||||
spec = torch.squeeze(spec, 0)
|
||||
return spec
|
||||
|
||||
def generate_input(self, newData: any, convertSize: int):
|
||||
newData = newData.astype(np.float32)
|
||||
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int):
|
||||
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
||||
|
||||
if hasattr(self, "audio_buffer"):
|
||||
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
|
||||
else:
|
||||
self.audio_buffer = newData
|
||||
|
||||
self.audio_buffer = self.audio_buffer[-(convertSize):] # 変換対象の部分だけ抽出
|
||||
convertSize = inputSize + crossfadeSize
|
||||
if convertSize < 8192:
|
||||
convertSize = 8192
|
||||
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
|
||||
|
||||
self.audio_buffer = self.audio_buffer[-1 * convertSize:] # 変換対象の部分だけ抽出
|
||||
|
||||
f0 = self._get_f0(self.settings.f0Detector, self.audio_buffer) # f0 生成
|
||||
spec = self._get_spec(self.audio_buffer)
|
||||
|
@ -158,9 +158,8 @@ class VoiceChanger():
|
||||
newData = receivedData
|
||||
|
||||
inputSize = newData.shape[0]
|
||||
crossfadeSize = self.settings.crossFadeOverlapSize if self.settings.crossFadeOverlapSize > 0 else inputSize
|
||||
crossfadeSize = min(self.settings.crossFadeOverlapSize, inputSize)
|
||||
|
||||
# convertSize = inputSize + min(self.settings.crossFadeOverlapSize, inputSize)
|
||||
print_convert_processing(
|
||||
f" Input data size: {receivedData.shape[0]}/{self.settings.inputSampleRate}hz {inputSize}/{processing_sampling_rate}hz")
|
||||
print_convert_processing(
|
||||
|
Loading…
Reference in New Issue
Block a user