voice-changer/demo/mods/Whisper.py
2022-11-03 04:05:42 +09:00

37 lines
1.1 KiB
Python
Executable File

import whisper
import numpy as np
import torchaudio
from scipy.io.wavfile import write
_MODELS = {
"tiny": "/whisper/tiny.pt",
"base": "/whisper/base.pt",
"small": "/whisper/small.pt",
"medium": "/whisper/medium.pt",
}
class Whisper():
def __init__(self):
self.storedSizeFromTry = 0
def loadModel(self, model):
# self.model = whisper.load_model(_MODELS[model], device="cpu")
self.model = whisper.load_model(_MODELS[model])
self.data = np.zeros(1).astype(np.float)
def addData(self, unpackedData):
self.data = np.concatenate([self.data, unpackedData], 0)
def transcribe(self, audio):
received_data_file = "received_data.wav"
write(received_data_file, 24000, self.data.astype(np.int16))
source, sr = torchaudio.load(received_data_file)
target = torchaudio.functional.resample(source, 24000, 16000)
result = self.model.transcribe(received_data_file)
print("WHISPER1:::", result["text"])
print("WHISPER2:::", result["segments"])
self.data = np.zeros(1).astype(np.float)
return result["text"]