mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-26 06:55:02 +03:00
37 lines
1.1 KiB
Python
37 lines
1.1 KiB
Python
|
import whisper
|
||
|
import numpy as np
|
||
|
import torchaudio
|
||
|
from scipy.io.wavfile import write
|
||
|
|
||
|
_MODELS = {
|
||
|
"tiny": "/whisper/tiny.pt",
|
||
|
"base": "/whisper/base.pt",
|
||
|
"small": "/whisper/small.pt",
|
||
|
"medium": "/whisper/medium.pt",
|
||
|
}
|
||
|
|
||
|
|
||
|
class Whisper():
|
||
|
def __init__(self):
|
||
|
self.storedSizeFromTry = 0
|
||
|
|
||
|
def loadModel(self, model):
|
||
|
# self.model = whisper.load_model(_MODELS[model], device="cpu")
|
||
|
self.model = whisper.load_model(_MODELS[model])
|
||
|
self.data = np.zeros(1).astype(np.float)
|
||
|
|
||
|
def addData(self, unpackedData):
|
||
|
self.data = np.concatenate([self.data, unpackedData], 0)
|
||
|
|
||
|
def transcribe(self, audio):
|
||
|
received_data_file = "received_data.wav"
|
||
|
write(received_data_file, 24000, self.data.astype(np.int16))
|
||
|
source, sr = torchaudio.load(received_data_file)
|
||
|
target = torchaudio.functional.resample(source, 24000, 16000)
|
||
|
result = self.model.transcribe(received_data_file)
|
||
|
print("WHISPER1:::", result["text"])
|
||
|
print("WHISPER2:::", result["segments"])
|
||
|
self.data = np.zeros(1).astype(np.float)
|
||
|
return result["text"]
|
||
|
|