diff --git a/server/voice_changer/DiffusionSVC/inferencer/DiffusionSVCInferencer.py b/server/voice_changer/DiffusionSVC/inferencer/DiffusionSVCInferencer.py
index 557db7f2..eb80fae7 100644
--- a/server/voice_changer/DiffusionSVC/inferencer/DiffusionSVCInferencer.py
+++ b/server/voice_changer/DiffusionSVC/inferencer/DiffusionSVCInferencer.py
@@ -113,18 +113,18 @@ class DiffusionSVCInferencer(Inferencer):
         infer_speedup: int,
         silence_front: float,
     ) -> torch.Tensor:
-        with Timer("pre-process") as t:
+        with Timer("pre-process", False) as t:
             gt_spec = self.naive_model_call(feats, pitch, volume, spk_id=sid, spk_mix_dict=None, aug_shift=0, spk_emb=None)
             # gt_spec = self.vocoder.extract(audio_t, 16000)
             # gt_spec = torch.cat((gt_spec, gt_spec[:, -1:, :]), 1)
 
         # print("[    ----Timer::1: ]", t.secs)
 
-        with Timer("pre-process") as t:
+        with Timer("pre-process", False) as t:
             out_mel = self.__call__(feats, pitch, volume, spk_id=sid, spk_mix_dict=None, aug_shift=0, gt_spec=gt_spec, infer_speedup=infer_speedup, method='dpm-solver', k_step=k_step, use_tqdm=False, spk_emb=None)
 
         # print("[    ----Timer::2: ]", t.secs)
-        with Timer("pre-process") as t:  # NOQA
+        with Timer("pre-process", False) as t:  # NOQA
             if self.vocoder_onnx is None:
                 start_frame = int(silence_front * self.vocoder.vocoder_sample_rate / self.vocoder.vocoder_hop_size)
                 out_wav = self.mel2wav(out_mel, pitch, start_frame=start_frame)
diff --git a/server/voice_changer/DiffusionSVC/pipeline/Pipeline.py b/server/voice_changer/DiffusionSVC/pipeline/Pipeline.py
index eb78d7fb..ed3ef8dc 100644
--- a/server/voice_changer/DiffusionSVC/pipeline/Pipeline.py
+++ b/server/voice_changer/DiffusionSVC/pipeline/Pipeline.py
@@ -81,23 +81,6 @@ class Pipeline(object):
 
     @torch.no_grad()
     def extract_volume_and_mask(self, audio: torch.Tensor, threshold: float):
-        '''
-        with Timer("[VolumeExt np]") as t:
-            for i in range(100):
-                volume = self.volumeExtractor.extract(audio)
-        time_np = t.secs
-        with Timer("[VolumeExt pt]") as t:
-            for i in range(100):
-                volume_t = self.volumeExtractor.extract_t(audio)
-        time_pt = t.secs
-
-        print("[Volume np]:", volume)
-        print("[Volume pt]:", volume_t)
-        print("[Perform]:", time_np, time_pt)
-        # -> [Perform]: 0.030178070068359375 0.005780220031738281 (RTX4090)
-        # -> [Perform]: 0.029046058654785156 0.0025115013122558594 (CPU i9 13900KF)
-        # ---> これくらいの処理ならCPU上のTorchでやった方が早い？
-        '''
         volume_t = self.volumeExtractor.extract_t(audio)
         mask = self.volumeExtractor.get_mask_from_volume_t(volume_t, self.inferencer_block_size, threshold=threshold)
         volume = volume_t.unsqueeze(-1).unsqueeze(0)
@@ -119,7 +102,7 @@ class Pipeline(object):
         protect=0.5
     ):
         # print("---------- pipe line --------------------")
-        with Timer("pre-process") as t:
+        with Timer("pre-process", False) as t:
             audio_t = torch.from_numpy(audio).float().unsqueeze(0).to(self.device)
             audio16k = self.resamplerIn(audio_t)
             volume, mask = self.extract_volume_and_mask(audio16k, threshold=-60.0)
@@ -127,7 +110,7 @@ class Pipeline(object):
             n_frames = int(audio16k.size(-1) // self.hop_size + 1)
         # print("[Timer::1: ]", t.secs)
 
-        with Timer("pre-process") as t:
+        with Timer("pre-process", False) as t:
             # ピッチ検出
             try:
                 # pitch = self.pitchExtractor.extract(
@@ -157,7 +140,7 @@ class Pipeline(object):
             feats = feats.view(1, -1)
         # print("[Timer::2: ]", t.secs)
 
-        with Timer("pre-process") as t:
+        with Timer("pre-process", False) as t:
 
             # embedding
             with autocast(enabled=self.isHalf):
@@ -175,7 +158,7 @@ class Pipeline(object):
             feats = F.interpolate(feats.permute(0, 2, 1), size=int(n_frames), mode='nearest').permute(0, 2, 1)
         # print("[Timer::3: ]", t.secs)
 
-        with Timer("pre-process") as t:
+        with Timer("pre-process", False) as t:
             # 推論実行
             try:
                 with torch.no_grad():
@@ -206,7 +189,7 @@ class Pipeline(object):
                     raise e
         # print("[Timer::4: ]", t.secs)
 
-        with Timer("pre-process") as t:  # NOQA
+        with Timer("pre-process", False) as t:  # NOQA
             feats_buffer = feats.squeeze(0).detach().cpu()
             if pitch is not None:
                 pitch_buffer = pitch.squeeze(0).detach().cpu()
diff --git a/server/voice_changer/RVC/RVCr2.py b/server/voice_changer/RVC/RVCr2.py
index 10e64335..419bd119 100644
--- a/server/voice_changer/RVC/RVCr2.py
+++ b/server/voice_changer/RVC/RVCr2.py
@@ -122,7 +122,7 @@ class RVCr2(VoiceChangerModel):
 
         if convertSize % 160 != 0:  # モデルの出力のホップサイズで切り捨てが発生するので補う。
             convertSize = convertSize + (160 - (convertSize % 160))
-        outSize = convertSize - extra_frame
+        outSize = int(((convertSize - extra_frame) / 16000) * self.slotInfo.samplingRate) 
 
         # バッファがたまっていない場合はzeroで補う
         if self.audio_buffer.shape[0] < convertSize:
@@ -193,6 +193,7 @@ class RVCr2(VoiceChangerModel):
         embOutputLayer = self.slotInfo.embOutputLayer
         useFinalProj = self.slotInfo.useFinalProj
 
+        
         try:
             audio_out, self.pitchf_buffer, self.feature_buffer = self.pipeline.exec(
                 sid,
@@ -202,14 +203,16 @@ class RVCr2(VoiceChangerModel):
                 f0_up_key,
                 index_rate,
                 if_f0,
-                self.settings.extraConvertSize / self.slotInfo.samplingRate if self.settings.silenceFront else 0.,  # extaraDataSizeの秒数。RVCのモデルのサンプリングレートで処理(★１)。
+                # 0,
+                self.settings.extraConvertSize / self.inputSampleRate if self.settings.silenceFront else 0.,  # extaraDataSizeの秒数。入力のサンプリングレートで算出
                 embOutputLayer,
                 useFinalProj,
                 repeat,
-                protect
+                protect,
+                outSize
             )
-            outSize = outSize // 16000 * self.slotInfo.samplingRate
-            result = audio_out[-outSize:].detach().cpu().numpy() * np.sqrt(vol)
+            # result = audio_out[-outSize:].detach().cpu().numpy() * np.sqrt(vol)
+            result = audio_out.detach().cpu().numpy() * np.sqrt(vol)
 
             result = cast(
                 AudioInOut,
diff --git a/server/voice_changer/RVC/pipeline/Pipeline.py b/server/voice_changer/RVC/pipeline/Pipeline.py
index fac98260..f1f7cd0a 100644
--- a/server/voice_changer/RVC/pipeline/Pipeline.py
+++ b/server/voice_changer/RVC/pipeline/Pipeline.py
@@ -90,7 +90,10 @@ class Pipeline(object):
         protect=0.5,
         out_size=None,
     ):
-        with Timer("main-process") as t:
+        # print(f"pipeline exec input, audio:{audio.shape}, pitchf:{pitchf.shape}, feature:{feature.shape}")
+        # print(f"pipeline exec input, silence_front:{silence_front}, out_size:{out_size}")
+
+        with Timer("main-process", False) as t:  # NOQA
             # 16000のサンプリングレートで入ってきている。以降この世界は16000で処理。
             search_index = self.index is not None and self.big_npy is not None and index_rate != 0
             # self.t_pad = self.sr * repeat  # 1秒
@@ -241,6 +244,7 @@ class Pipeline(object):
                     raise e
 
             feats_buffer = feats.squeeze(0).detach().cpu()
+
             if pitchf is not None:
                 pitchf_buffer = pitchf.squeeze(0).detach().cpu()
             else:
@@ -258,6 +262,7 @@ class Pipeline(object):
 
             del sid
             # torch.cuda.empty_cache()
+        # print("EXEC AVERAGE:", t.avrSecs)
         return audio1, pitchf_buffer, feats_buffer
 
     def __del__(self):
diff --git a/server/voice_changer/VoiceChangerV2.py b/server/voice_changer/VoiceChangerV2.py
index ba151bbe..0623cb43 100644
--- a/server/voice_changer/VoiceChangerV2.py
+++ b/server/voice_changer/VoiceChangerV2.py
@@ -208,12 +208,13 @@ class VoiceChangerV2(VoiceChangerIF):
                 block_frame = receivedData.shape[0]
                 crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
                 self._generate_strength(crossfade_frame)
-                # data = self.voiceChanger.generate_input(newData, block_frame, crossfade_frame, sola_search_frame)
+
                 audio = self.voiceChanger.inference(
                     receivedData,
                     crossfade_frame=crossfade_frame,
                     sola_search_frame=sola_search_frame
                 )
+
                 if hasattr(self, "sola_buffer") is True:
                     np.set_printoptions(threshold=10000)
                     audio_offset = -1 * (sola_search_frame + crossfade_frame + block_frame)
diff --git a/server/voice_changer/utils/Timer.py b/server/voice_changer/utils/Timer.py
index 43d7591f..d73169d4 100644
--- a/server/voice_changer/utils/Timer.py
+++ b/server/voice_changer/utils/Timer.py
@@ -1,15 +1,43 @@
 import time
+import inspect
 
 
 class Timer(object):
-    def __init__(self, title: str):
+    storedSecs = {}  # Class variable
+
+    def __init__(self, title: str, enalbe: bool = True):
         self.title = title
+        self.enable = enalbe
+        self.secs = 0
+        self.msecs = 0
+        self.avrSecs = 0
+
+        if self.enable is False:
+            return
+
+        self.maxStores = 10
+
+        current_frame = inspect.currentframe()
+        caller_frame = inspect.getouterframes(current_frame, 2)
+        frame = caller_frame[1]
+        filename = frame.filename
+        line_number = frame.lineno
+        self.key = f"{title}_{filename}_{line_number}"
+        if self.key not in self.storedSecs:
+            self.storedSecs[self.key] = []
 
     def __enter__(self):
+        if self.enable is False:
+            return
         self.start = time.time()
         return self
 
     def __exit__(self, *_):
+        if self.enable is False:
+            return
         self.end = time.time()
         self.secs = self.end - self.start
         self.msecs = self.secs * 1000  # millisecs
+        self.storedSecs[self.key].append(self.secs)
+        self.storedSecs[self.key] = self.storedSecs[self.key][-self.maxStores:]
+        self.avrSecs = sum(self.storedSecs[self.key]) / len(self.storedSecs[self.key])