import uvicorn from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from fastapi.encoders import jsonable_encoder from fastapi.responses import JSONResponse from fastapi.staticfiles import StaticFiles import logging from logging.config import dictConfig import os, sys, math, base64, struct, traceback, time import torch, torchaudio import numpy as np from scipy.io.wavfile import write, read from datetime import datetime args = sys.argv PORT = args[1] MODE = args[2] logger = logging.getLogger('uvicorn') app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) if MODE == "colab": print("ENV: colab") app.mount("/front", StaticFiles(directory="../frontend/dist", html=True), name="static") hubert_model = torch.hub.load("bshall/hubert:main", "hubert_soft").cuda() acoustic_model = torch.hub.load("bshall/acoustic-model:main", "hubert_soft").cuda() hifigan_model = torch.hub.load("bshall/hifigan:main", "hifigan_hubert_soft").cuda() else: print("ENV: Docker") app.mount("/front", StaticFiles(directory="../frontend/dist", html=True), name="static") sys.path.append("/hubert") from hubert import hubert_discrete, hubert_soft, kmeans100 sys.path.append("/acoustic-model") from acoustic import hubert_discrete, hubert_soft sys.path.append("/hifigan") from hifigan import hifigan hubert_model = torch.load("/models/bshall_hubert_main.pt").cuda() acoustic_model = torch.load("/models/bshall_acoustic-model_main.pt").cuda() hifigan_model = torch.load("/models/bshall_hifigan_main.pt").cuda() def applyVol(i, chunk, vols): curVol = vols[i] / 2 if curVol < 0.0001: line = torch.zeros(chunk.size()) else: line = torch.ones(chunk.size()) volApplied = torch.mul(line, chunk) volApplied = volApplied.unsqueeze(0) return volApplied @app.get("/test") def get_test(): try: return request.args.get('query', '') except Exception as e: print("REQUEST PROCESSING!!!! EXCEPTION!!!", e) print(traceback.format_exc()) return str(e) class VoiceModel(BaseModel): gpu: int srcId: int dstId: int timestamp: int buffer: str @app.post("/test") def post_test(voice:VoiceModel): try: print("POST REQUEST PROCESSING....") gpu = voice.gpu srcId = voice.srcId dstId = voice.dstId timestamp = voice.timestamp buffer = voice.buffer wav = base64.b64decode(buffer) unpackedData = np.array(struct.unpack('<%sh'%(len(wav) // struct.calcsize('