mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 13:35:12 +03:00
WIP: so-vits-svc 40v2, alpha (refactoring4 apply for v15)
This commit is contained in:
parent
7a2fd74d6c
commit
5818292046
4
client/demo_v15/dist/index.js
vendored
4
client/demo_v15/dist/index.js
vendored
File diff suppressed because one or more lines are too long
@ -28,7 +28,7 @@ const App = () => {
|
|||||||
return (
|
return (
|
||||||
<div className="top-title">
|
<div className="top-title">
|
||||||
<span className="title">Voice Changer Setting</span>
|
<span className="title">Voice Changer Setting</span>
|
||||||
<span className="top-title-version">for v.1.5.x</span>
|
<span className="top-title-version">for MMVC v.1.5.x</span>
|
||||||
<span className="belongings">
|
<span className="belongings">
|
||||||
<a className="link" href="https://github.com/w-okada/voice-changer" target="_blank" rel="noopener noreferrer">
|
<a className="link" href="https://github.com/w-okada/voice-changer" target="_blank" rel="noopener noreferrer">
|
||||||
<img src="./assets/icons/github.svg" />
|
<img src="./assets/icons/github.svg" />
|
||||||
|
@ -132,6 +132,9 @@ class MMVCv15:
|
|||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
def get_processing_sampling_rate(self):
|
||||||
|
return self.hps.data.sampling_rate
|
||||||
|
|
||||||
def _get_f0(self, detector: str, newData: any):
|
def _get_f0(self, detector: str, newData: any):
|
||||||
|
|
||||||
audio_norm_np = newData.astype(np.float64)
|
audio_norm_np = newData.astype(np.float64)
|
||||||
@ -146,7 +149,8 @@ class MMVCv15:
|
|||||||
|
|
||||||
def _get_spec(self, newData: any):
|
def _get_spec(self, newData: any):
|
||||||
audio = torch.FloatTensor(newData)
|
audio = torch.FloatTensor(newData)
|
||||||
audio_norm = audio / self.hps.data.max_wav_value # normalize
|
# audio_norm = audio / self.hps.data.max_wav_value # normalize
|
||||||
|
audio_norm = audio
|
||||||
audio_norm = audio_norm.unsqueeze(0) # unsqueeze
|
audio_norm = audio_norm.unsqueeze(0) # unsqueeze
|
||||||
spec = spectrogram_torch(audio_norm, self.hps.data.filter_length,
|
spec = spectrogram_torch(audio_norm, self.hps.data.filter_length,
|
||||||
self.hps.data.sampling_rate, self.hps.data.hop_length, self.hps.data.win_length,
|
self.hps.data.sampling_rate, self.hps.data.hop_length, self.hps.data.win_length,
|
||||||
@ -154,15 +158,21 @@ class MMVCv15:
|
|||||||
spec = torch.squeeze(spec, 0)
|
spec = torch.squeeze(spec, 0)
|
||||||
return spec
|
return spec
|
||||||
|
|
||||||
def generate_input(self, newData: any, convertSize: int):
|
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int):
|
||||||
newData = newData.astype(np.float32)
|
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
||||||
|
|
||||||
if hasattr(self, "audio_buffer"):
|
if hasattr(self, "audio_buffer"):
|
||||||
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
|
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
|
||||||
else:
|
else:
|
||||||
self.audio_buffer = newData
|
self.audio_buffer = newData
|
||||||
|
|
||||||
self.audio_buffer = self.audio_buffer[-(convertSize):] # 変換対象の部分だけ抽出
|
convertSize = inputSize + crossfadeSize
|
||||||
|
if convertSize < 8192:
|
||||||
|
convertSize = 8192
|
||||||
|
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||||
|
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
|
||||||
|
|
||||||
|
self.audio_buffer = self.audio_buffer[-1 * convertSize:] # 変換対象の部分だけ抽出
|
||||||
|
|
||||||
f0 = self._get_f0(self.settings.f0Detector, self.audio_buffer) # f0 生成
|
f0 = self._get_f0(self.settings.f0Detector, self.audio_buffer) # f0 生成
|
||||||
spec = self._get_spec(self.audio_buffer)
|
spec = self._get_spec(self.audio_buffer)
|
||||||
|
@ -158,9 +158,8 @@ class VoiceChanger():
|
|||||||
newData = receivedData
|
newData = receivedData
|
||||||
|
|
||||||
inputSize = newData.shape[0]
|
inputSize = newData.shape[0]
|
||||||
crossfadeSize = self.settings.crossFadeOverlapSize if self.settings.crossFadeOverlapSize > 0 else inputSize
|
crossfadeSize = min(self.settings.crossFadeOverlapSize, inputSize)
|
||||||
|
|
||||||
# convertSize = inputSize + min(self.settings.crossFadeOverlapSize, inputSize)
|
|
||||||
print_convert_processing(
|
print_convert_processing(
|
||||||
f" Input data size: {receivedData.shape[0]}/{self.settings.inputSampleRate}hz {inputSize}/{processing_sampling_rate}hz")
|
f" Input data size: {receivedData.shape[0]}/{self.settings.inputSampleRate}hz {inputSize}/{processing_sampling_rate}hz")
|
||||||
print_convert_processing(
|
print_convert_processing(
|
||||||
|
Loading…
Reference in New Issue
Block a user