mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-27 21:41:41 +03:00
37 lines
1.1 KiB
Python
Executable File
37 lines
1.1 KiB
Python
Executable File
import whisper
|
|
import numpy as np
|
|
import torchaudio
|
|
from scipy.io.wavfile import write
|
|
|
|
_MODELS = {
|
|
"tiny": "/whisper/tiny.pt",
|
|
"base": "/whisper/base.pt",
|
|
"small": "/whisper/small.pt",
|
|
"medium": "/whisper/medium.pt",
|
|
}
|
|
|
|
|
|
class Whisper():
|
|
def __init__(self):
|
|
self.storedSizeFromTry = 0
|
|
|
|
def loadModel(self, model):
|
|
# self.model = whisper.load_model(_MODELS[model], device="cpu")
|
|
self.model = whisper.load_model(_MODELS[model])
|
|
self.data = np.zeros(1).astype(np.float)
|
|
|
|
def addData(self, unpackedData):
|
|
self.data = np.concatenate([self.data, unpackedData], 0)
|
|
|
|
def transcribe(self, audio):
|
|
received_data_file = "received_data.wav"
|
|
write(received_data_file, 24000, self.data.astype(np.int16))
|
|
source, sr = torchaudio.load(received_data_file)
|
|
target = torchaudio.functional.resample(source, 24000, 16000)
|
|
result = self.model.transcribe(received_data_file)
|
|
print("WHISPER1:::", result["text"])
|
|
print("WHISPER2:::", result["segments"])
|
|
self.data = np.zeros(1).astype(np.float)
|
|
return result["text"]
|
|
|