From 816798e8b594e852a204f34ed60ea9321dc40826 Mon Sep 17 00:00:00 2001 From: nadare <1na2da0re3@gmail.com> Date: Sun, 9 Jul 2023 03:34:10 +0900 Subject: [PATCH] delete padding --- .../RVC/pitchExtractor/CrepeOnnxPitchExtractor.py | 2 -- .../voice_changer/RVC/pitchExtractor/CrepePitchExtractor.py | 4 ---- server/voice_changer/RVC/pitchExtractor/DioPitchExtractor.py | 3 +-- .../voice_changer/RVC/pitchExtractor/HarvestPitchExtractor.py | 2 -- 4 files changed, 1 insertion(+), 10 deletions(-) diff --git a/server/voice_changer/RVC/pitchExtractor/CrepeOnnxPitchExtractor.py b/server/voice_changer/RVC/pitchExtractor/CrepeOnnxPitchExtractor.py index fd126105..64b1ed42 100644 --- a/server/voice_changer/RVC/pitchExtractor/CrepeOnnxPitchExtractor.py +++ b/server/voice_changer/RVC/pitchExtractor/CrepeOnnxPitchExtractor.py @@ -54,8 +54,6 @@ class CrepeOnnxPitchExtractor(PitchExtractor): f0[pd < 0.1] = 0 f0 = f0.squeeze() - f0 = np.pad(f0, (start_frame, n_frames - f0.shape[0] - start_frame), 'constant', constant_values=(0, 0)) - f0 *= pow(2, f0_up_key / 12) pitchf[-f0.shape[0]:] = f0[:pitchf.shape[0]] f0bak = pitchf.copy() diff --git a/server/voice_changer/RVC/pitchExtractor/CrepePitchExtractor.py b/server/voice_changer/RVC/pitchExtractor/CrepePitchExtractor.py index 2883e215..55528a0d 100644 --- a/server/voice_changer/RVC/pitchExtractor/CrepePitchExtractor.py +++ b/server/voice_changer/RVC/pitchExtractor/CrepePitchExtractor.py @@ -47,10 +47,6 @@ class CrepePitchExtractor(PitchExtractor): f0[pd < 0.1] = 0 f0 = f0.squeeze() - f0 = torch.nn.functional.pad( - f0, (start_frame, n_frames - f0.shape[0] - start_frame) - ) - f0 *= pow(2, f0_up_key / 12) pitchf[-f0.shape[0]:] = f0.detach().cpu().numpy()[:pitchf.shape[0]] f0bak = pitchf.copy() diff --git a/server/voice_changer/RVC/pitchExtractor/DioPitchExtractor.py b/server/voice_changer/RVC/pitchExtractor/DioPitchExtractor.py index a0b0defb..a7d4c95b 100644 --- a/server/voice_changer/RVC/pitchExtractor/DioPitchExtractor.py +++ b/server/voice_changer/RVC/pitchExtractor/DioPitchExtractor.py @@ -17,7 +17,7 @@ class DioPitchExtractor(PitchExtractor): start_frame = int(silence_front * sr / window) real_silence_front = start_frame * window / sr - silence_front_offset = int(np.round(real_silence_front * sr)) + silence_front_offset = max(min(int(np.round(real_silence_front * sr)), len(audio) - 3000), 0) audio = audio[silence_front_offset:] f0_min = 50 @@ -34,7 +34,6 @@ class DioPitchExtractor(PitchExtractor): frame_period=10, ) f0 = pyworld.stonemask(audio.astype(np.double), _f0, t, sr) - # f0 = np.pad(f0.astype("float"), (start_frame, n_frames - len(f0) - start_frame)) f0 *= pow(2, f0_up_key / 12) pitchf[-f0.shape[0]:] = f0[:pitchf.shape[0]] diff --git a/server/voice_changer/RVC/pitchExtractor/HarvestPitchExtractor.py b/server/voice_changer/RVC/pitchExtractor/HarvestPitchExtractor.py index 469799f0..776c0c2c 100644 --- a/server/voice_changer/RVC/pitchExtractor/HarvestPitchExtractor.py +++ b/server/voice_changer/RVC/pitchExtractor/HarvestPitchExtractor.py @@ -35,8 +35,6 @@ class HarvestPitchExtractor(PitchExtractor): f0 = pyworld.stonemask(audio.astype(np.double), f0, t, sr) f0 = signal.medfilt(f0, 3) - # f0 = np.pad(f0.astype("float"), (start_frame, n_frames - len(f0) - start_frame)) - f0 *= pow(2, f0_up_key / 12) pitchf[-f0.shape[0]:] = f0[:pitchf.shape[0]] f0bak = pitchf.copy()