From 357f411d91614d6ff60d7308716475763cfa9eee Mon Sep 17 00:00:00 2001 From: wataru Date: Sat, 31 Dec 2022 20:25:28 +0900 Subject: [PATCH] wip --- .gitignore | 6 +- server/MMVCServerSIO.py | 133 +--------------- server/mods/Trainer_Model.py | 13 ++ server/mods/Trainer_Models.py | 23 +++ server/mods/Trainer_MultiSpeakerSetting.py | 26 +++ server/mods/Trainer_Speaker.py | 15 ++ server/mods/Trainer_Speaker_Voice.py | 28 ++++ server/mods/Trainer_Speaker_Voices.py | 22 +++ server/mods/Trainer_Speakers.py | 15 ++ server/mods/Trainer_Training.py | 176 +++++++++++++++++++++ server/mods/VoiceChanger.py | 106 +++++++++++++ server/mods/Whisper.py | 36 +++++ server/mods/ssl.py | 24 +++ server/restapi/MMVC_Rest.py | 6 + server/restapi/MMVC_Rest_Fileuploader.py | 2 +- server/restapi/MMVC_Rest_Trainer.py | 94 +++++++++++ server/restapi/mods/FileUploader.py | 27 ++++ server/trainer_mods/files.py | 24 +++ 18 files changed, 645 insertions(+), 131 deletions(-) create mode 100755 server/mods/Trainer_Model.py create mode 100755 server/mods/Trainer_Models.py create mode 100755 server/mods/Trainer_MultiSpeakerSetting.py create mode 100755 server/mods/Trainer_Speaker.py create mode 100755 server/mods/Trainer_Speaker_Voice.py create mode 100755 server/mods/Trainer_Speaker_Voices.py create mode 100755 server/mods/Trainer_Speakers.py create mode 100755 server/mods/Trainer_Training.py create mode 100755 server/mods/VoiceChanger.py create mode 100755 server/mods/Whisper.py create mode 100755 server/mods/ssl.py create mode 100644 server/restapi/MMVC_Rest_Trainer.py create mode 100755 server/restapi/mods/FileUploader.py create mode 100755 server/trainer_mods/files.py diff --git a/.gitignore b/.gitignore index b02c05d6..fc339b21 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,8 @@ __pycache__ server/upload_dir/ server/MMVC_Trainer/ -server/key \ No newline at end of file +server/key +server/info + +server/memo.txt + diff --git a/server/MMVCServerSIO.py b/server/MMVCServerSIO.py index 2232485d..846ff420 100755 --- a/server/MMVCServerSIO.py +++ b/server/MMVCServerSIO.py @@ -19,47 +19,17 @@ from fastapi.responses import JSONResponse import uvicorn import socketio -from pydantic import BaseModel from typing import Callable - -from mods.Trainer_Speakers import mod_get_speakers -from mods.Trainer_Training import mod_post_pre_training, mod_post_start_training, mod_post_stop_training, mod_get_related_files, mod_get_tail_training_log -from mods.Trainer_Model import mod_get_model, mod_delete_model - -from mods.Trainer_Models import mod_get_models -from mods.Trainer_MultiSpeakerSetting import mod_get_multi_speaker_setting, mod_post_multi_speaker_setting -from mods.Trainer_Speaker_Voice import mod_get_speaker_voice -from mods.Trainer_Speaker_Voices import mod_get_speaker_voices - -from mods.Trainer_Speaker import mod_delete_speaker -from mods.FileUploader import upload_file, concat_file_chunks - from mods.VoiceChanger import VoiceChanger - from mods.ssl import create_self_signed_cert - from voice_changer.VoiceChangerManager import VoiceChangerManager from sio.MMVC_SocketIOApp import MMVC_SocketIOApp - from restapi.MMVC_Rest import MMVC_Rest -from pydantic import BaseModel - -class VoiceModel(BaseModel): - gpu: int - srcId: int - dstId: int - timestamp: int - prefixChunkSize: int - buffer: str - -@dataclass -class ExApplicationInfo(): - external_tensorboard_port: int -exApplitionInfo = ExApplicationInfo(external_tensorboard_port=0) + @@ -105,10 +75,6 @@ args = parser.parse_args() printMessage(f"Phase name:{__name__}", level=2) thisFilename = os.path.basename(__file__)[:-3] - - - - if __name__ == thisFilename or args.colab == True: printMessage(f"PHASE3:{__name__}", level=2) TYPE = args.t @@ -116,9 +82,9 @@ if __name__ == thisFilename or args.colab == True: CONFIG = args.c MODEL = args.m - if os.getenv("EX_TB_PORT"): - EX_TB_PORT = os.environ["EX_TB_PORT"] - exApplitionInfo.external_tensorboard_port = int(EX_TB_PORT) + # if os.getenv("EX_TB_PORT"): + # EX_TB_PORT = os.environ["EX_TB_PORT"] + # exApplitionInfo.external_tensorboard_port = int(EX_TB_PORT) voiceChangerManager = VoiceChangerManager.get_instance() if CONFIG and MODEL: @@ -126,97 +92,6 @@ if __name__ == thisFilename or args.colab == True: app_fastapi = MMVC_Rest.get_instance(voiceChangerManager) app_socketio = MMVC_SocketIOApp.get_instance(app_fastapi, voiceChangerManager) - ############ - # File Uploder - # ########## - # @app_fastapi.post("/load_model_for_train") - # async def post_load_model_for_train( - # modelGFilename: str = Form(...), - # modelGFilenameChunkNum: int = Form(...), - # modelDFilename: str = Form(...), - # modelDFilenameChunkNum: int = Form(...), - # ): - - - - # @app_fastapi.post("/extract_voices") - # async def post_load_model( - # zipFilename: str = Form(...), - # zipFileChunkNum: int = Form(...), - # ): - # zipFilePath = concat_file_chunks( - # UPLOAD_DIR, zipFilename, zipFileChunkNum, UPLOAD_DIR) - # shutil.unpack_archive(zipFilePath, "MMVC_Trainer/dataset/textful/") - # return {"Zip file unpacked": f"{zipFilePath}"} - - ############ - # Voice Changer - # ########## - - # Trainer REST API ※ ColabがTop直下のパスにしかPOSTを投げれないようなので"REST風" - - @app_fastapi.get("/get_speakers") - async def get_speakers(): - return mod_get_speakers() - - @app_fastapi.delete("/delete_speaker") - async def delete_speaker(speaker: str = Form(...)): - return mod_delete_speaker(speaker) - - @app_fastapi.get("/get_speaker_voices") - async def get_speaker_voices(speaker: str): - return mod_get_speaker_voices(speaker) - - @app_fastapi.get("/get_speaker_voice") - async def get_speaker_voices(speaker: str, voice: str): - return mod_get_speaker_voice(speaker, voice) - - @app_fastapi.get("/get_multi_speaker_setting") - async def get_multi_speaker_setting(): - return mod_get_multi_speaker_setting() - - @app_fastapi.post("/post_multi_speaker_setting") - async def post_multi_speaker_setting(setting: str = Form(...)): - return mod_post_multi_speaker_setting(setting) - - @app_fastapi.get("/get_models") - async def get_models(): - return mod_get_models() - - @app_fastapi.get("/get_model") - async def get_model(model: str): - return mod_get_model(model) - - @app_fastapi.delete("/delete_model") - async def delete_model(model: str = Form(...)): - return mod_delete_model(model) - - @app_fastapi.post("/post_pre_training") - async def post_pre_training(batch: int = Form(...)): - return mod_post_pre_training(batch) - - @app_fastapi.post("/post_start_training") - async def post_start_training(enable_finetuning: bool = Form(...),GModel: str = Form(...),DModel: str = Form(...)): - print("POST START TRAINING..") - return mod_post_start_training(enable_finetuning, GModel, DModel) - - @app_fastapi.post("/post_stop_training") - async def post_stop_training(): - print("POST STOP TRAINING..") - return mod_post_stop_training() - - @app_fastapi.get("/get_related_files") - async def get_related_files(): - return mod_get_related_files() - - @app_fastapi.get("/get_tail_training_log") - async def get_tail_training_log(num: int): - return mod_get_tail_training_log(num) - - @app_fastapi.get("/get_ex_application_info") - async def get_ex_application_info(): - json_compatible_item_data = jsonable_encoder(exApplitionInfo) - return JSONResponse(content=json_compatible_item_data) if __name__ == '__mp_main__': printMessage(f"PHASE2:{__name__}", level=2) diff --git a/server/mods/Trainer_Model.py b/server/mods/Trainer_Model.py new file mode 100755 index 00000000..37087f43 --- /dev/null +++ b/server/mods/Trainer_Model.py @@ -0,0 +1,13 @@ + +from fastapi.responses import FileResponse +import os + +def mod_get_model(modelFile:str): + modelPath = os.path.join("MMVC_Trainer/logs", modelFile) + return FileResponse(path=modelPath) + +def mod_delete_model(modelFile:str): + modelPath = os.path.join("MMVC_Trainer/logs", modelFile) + os.unlink(modelPath) + return {"Model deleted": f"{modelFile}"} + diff --git a/server/mods/Trainer_Models.py b/server/mods/Trainer_Models.py new file mode 100755 index 00000000..5b229134 --- /dev/null +++ b/server/mods/Trainer_Models.py @@ -0,0 +1,23 @@ + +from fastapi.responses import JSONResponse +from fastapi.encoders import jsonable_encoder +from trainer_mods.files import get_file_list +import os + +def mod_get_models(): + gModels = get_file_list(f'MMVC_Trainer/logs/G*.pth') + dModels = get_file_list(f'MMVC_Trainer/logs/D*.pth') + configs = get_file_list(f'MMVC_Trainer/logs/config.json') + models = [] + models.extend(gModels) + models.extend(dModels) + models.extend(configs) + models = [ os.path.basename(x) for x in models] + + models = sorted(models) + data = { + "models":models + } + json_compatible_item_data = jsonable_encoder(data) + return JSONResponse(content=json_compatible_item_data) + diff --git a/server/mods/Trainer_MultiSpeakerSetting.py b/server/mods/Trainer_MultiSpeakerSetting.py new file mode 100755 index 00000000..cbeab8cb --- /dev/null +++ b/server/mods/Trainer_MultiSpeakerSetting.py @@ -0,0 +1,26 @@ +from fastapi.responses import JSONResponse +from fastapi.encoders import jsonable_encoder +import os + +MULTI_SPEAKER_SETTING_PATH = "MMVC_Trainer/dataset/multi_speaker_correspondence.txt" +def mod_get_multi_speaker_setting(): + data = {} + if os.path.isfile(MULTI_SPEAKER_SETTING_PATH) == False: + with open(MULTI_SPEAKER_SETTING_PATH, "w") as f: + f.write("") + f.flush() + f.close() + + with open(MULTI_SPEAKER_SETTING_PATH, "r") as f: + setting = f.read() + data["multi_speaker_setting"] = setting + json_compatible_item_data = jsonable_encoder(data) + return JSONResponse(content=json_compatible_item_data) + + +def mod_post_multi_speaker_setting(setting:str): + with open(MULTI_SPEAKER_SETTING_PATH, "w") as f: + f.write(setting) + f.flush() + f.close() + return {"Write Multispeaker setting": f"{setting}"} \ No newline at end of file diff --git a/server/mods/Trainer_Speaker.py b/server/mods/Trainer_Speaker.py new file mode 100755 index 00000000..db2e2519 --- /dev/null +++ b/server/mods/Trainer_Speaker.py @@ -0,0 +1,15 @@ +import shutil +from mods.Trainer_MultiSpeakerSetting import MULTI_SPEAKER_SETTING_PATH + +def mod_delete_speaker(speaker:str): + shutil.rmtree(f"MMVC_Trainer/dataset/textful/{speaker}") + + with open(MULTI_SPEAKER_SETTING_PATH, "r") as f: + setting = f.readlines() + + filtered = filter(lambda x: x.startswith(f"{speaker}|")==False, setting) + with open(MULTI_SPEAKER_SETTING_PATH, "w") as f: + f.writelines(list(filtered)) + f.flush() + f.close() + return {"Speaker deleted": f"{speaker}"} \ No newline at end of file diff --git a/server/mods/Trainer_Speaker_Voice.py b/server/mods/Trainer_Speaker_Voice.py new file mode 100755 index 00000000..0727d7b1 --- /dev/null +++ b/server/mods/Trainer_Speaker_Voice.py @@ -0,0 +1,28 @@ +from fastapi.responses import JSONResponse +from fastapi.encoders import jsonable_encoder +import os, base64 + +def mod_get_speaker_voice(speaker:str, voice:str): + wav_file = f'MMVC_Trainer/dataset/textful/{speaker}/wav/{voice}.wav' + text_file = f'MMVC_Trainer/dataset/textful/{speaker}/text/{voice}.txt' + readable_text_file = f'MMVC_Trainer/dataset/textful/{speaker}/readable_text/{voice}.txt' + + data = {} + if os.path.exists(wav_file): + with open(wav_file, "rb") as f: + wav_data = f.read() + wav_data_base64 = base64.b64encode(wav_data).decode('utf-8') + data["wav"] = wav_data_base64 + + + if os.path.exists(text_file): + with open(text_file, "r") as f: + text_data = f.read() + data["text"] = text_data + + if os.path.exists(readable_text_file): + with open(readable_text_file, "r") as f: + text_data = f.read() + data["readable_text"] = text_data + json_compatible_item_data = jsonable_encoder(data) + return JSONResponse(content=json_compatible_item_data) diff --git a/server/mods/Trainer_Speaker_Voices.py b/server/mods/Trainer_Speaker_Voices.py new file mode 100755 index 00000000..172ed3bd --- /dev/null +++ b/server/mods/Trainer_Speaker_Voices.py @@ -0,0 +1,22 @@ +from fastapi.responses import JSONResponse +from fastapi.encoders import jsonable_encoder +from trainer_mods.files import get_file_list +import os + +def mod_get_speaker_voices(speaker:str): + voices = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/wav/*.wav') + + texts = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/text/*.txt') + + readable_texts = get_file_list(f'MMVC_Trainer/dataset/textful/{speaker}/readable_text/*.txt') + + items = voices + items.extend(texts) + items.extend(readable_texts) + items = [ os.path.splitext(os.path.basename(x))[0] for x in items] + items = sorted(set(items)) + data = { + "voices":items + } + json_compatible_item_data = jsonable_encoder(data) + return JSONResponse(content=json_compatible_item_data) \ No newline at end of file diff --git a/server/mods/Trainer_Speakers.py b/server/mods/Trainer_Speakers.py new file mode 100755 index 00000000..4bc5a48b --- /dev/null +++ b/server/mods/Trainer_Speakers.py @@ -0,0 +1,15 @@ +from fastapi.responses import JSONResponse +from fastapi.encoders import jsonable_encoder +from trainer_mods.files import get_dir_list +import os +# CreateはFileUploaderで実装。 + +def mod_get_speakers(): + os.makedirs("MMVC_Trainer/dataset/textful", exist_ok=True) + speakers = get_dir_list("MMVC_Trainer/dataset/textful/") + + data = { + "speakers":sorted(speakers) + } + json_compatible_item_data = jsonable_encoder(data) + return JSONResponse(content=json_compatible_item_data) diff --git a/server/mods/Trainer_Training.py b/server/mods/Trainer_Training.py new file mode 100755 index 00000000..67f02107 --- /dev/null +++ b/server/mods/Trainer_Training.py @@ -0,0 +1,176 @@ +import subprocess,os +from trainer_mods.files import get_file_list +from fastapi.responses import JSONResponse +from fastapi.encoders import jsonable_encoder + +LOG_DIR = "info" +train_proc = None + +SUCCESS = 0 +ERROR = -1 +### Submodule for Pre train +def sync_exec(cmd:str, log_path:str, cwd=None): + shortCmdStr = cmd[:20] + try: + with open(log_path, 'w') as log_file: + if cwd == None: + proc = subprocess.run(cmd, shell=True, text=True, stdout=log_file, stderr=log_file) + else: + proc = subprocess.run(cmd, shell=True, text=True, stdout=log_file, stderr=log_file, cwd=cwd) + print(f"{shortCmdStr} returncode:{proc.returncode}") + if proc.returncode != 0: + print(f"{shortCmdStr} exception:") + return (ERROR, f"returncode:{proc.returncode}") + except Exception as e: + print(f"{shortCmdStr} exception:", str(e)) + return (ERROR, str(e)) + return (SUCCESS, "success") + +def sync_exec_with_stdout(cmd:str, log_path:str): + shortCmdStr = cmd[:20] + try: + with open(log_path, 'w') as log_file: + proc = subprocess.run(cmd, shell=True, text=True, stdout=subprocess.PIPE, + stderr=log_file, cwd="MMVC_Trainer") + print(f"STDOUT{shortCmdStr}",proc.stdout) + except Exception as e: + print(f"{shortCmdStr} exception:", str(e)) + return (ERROR, str(e)) + return (SUCCESS, proc.stdout) + + +def create_dataset(): + cmd = "python3 create_dataset_jtalk.py -f train_config -s 24000 -m dataset/multi_speaker_correspondence.txt" + log_file = os.path.join(LOG_DIR, "log_create_dataset_jtalk.txt") + res = sync_exec(cmd, log_file, "MMVC_Trainer") + return res + +def set_batch_size(batch:int): + cmd = "sed -i 's/\"batch_size\": [0-9]*/\"batch_size\": " + str(batch) + "/' MMVC_Trainer/configs/baseconfig.json" + log_file = os.path.join(LOG_DIR, "log_set_batch_size.txt") + res = sync_exec(cmd, log_file) + return res + +def set_dummy_device_count(): + cmd = 'sed -ie "s/torch.cuda.device_count()/1/" MMVC_Trainer/train_ms.py' + log_file = os.path.join(LOG_DIR, "log_set_dummy_device_count.txt") + res = sync_exec(cmd, log_file) + return res + +### Submodule for Train +def exec_training(enable_finetuning:bool, GModel:str, DModel:str): + global train_proc + log_file = os.path.join(LOG_DIR, "training.txt") + + # トレーニング開始確認(二重起動回避) + if train_proc != None: + status = train_proc.poll() + if status != None: + print("Training have ended.", status) + train_proc = None + else: + print("Training have stated.") + return (ERROR, "Training have started") + + try: + with open(log_file, 'w') as log_file: + if enable_finetuning == True: + GModelPath = os.path.join("logs", GModel) # 実行時にcwdを指定しているのでフォルダはlogsでよい。 + DModelPath = os.path.join("logs", DModel) + cmd = f'python3 train_ms.py -c configs/train_config.json -m ./ -fg {GModelPath} -fd {DModelPath}' + else: + cmd = 'python3 train_ms.py -c configs/train_config.json -m ./' + print("exec:",cmd) + train_proc = subprocess.Popen("exec "+cmd, shell=True, text=True, stdout=log_file, stderr=log_file, cwd="MMVC_Trainer") + print("Training stated") + print(f"returncode:{train_proc.returncode}") + except Exception as e: + print("start training exception:", str(e)) + return (ERROR, str(e)) + + return (SUCCESS, "success") + +def stop_training(): + global train_proc + if train_proc == None: + print("Training have not stated.") + return (ERROR, "Training have not stated.") + + status = train_proc.poll() + if status != None: + print("Training have already ended.", status) + train_proc = None + return (ERROR, "Training have already ended. " + status) + else: + train_proc.kill() + print("Training have stoped.") + return (SUCCESS, "success") + +### Main +def mod_post_pre_training(batch:int): + res = set_batch_size(batch) + if res[0] == ERROR: + return {"result":"failed", "detail": f"Preprocess(set_batch_size) failed. {res[1]}"} + + res = set_dummy_device_count() + if res[0] == ERROR: + return {"result":"failed", "detail": f"Preprocess(set_dummy_device_count) failed. {res[1]}"} + + res = create_dataset() + if res[0] == ERROR: + return {"result":"failed", "detail": f"Preprocess failed(create_dataset). {res[1]}"} + + return {"result":"success", "detail": f"Preprocess succeeded. {res[1]}"} + + +def mod_post_start_training(enable_finetuning:str, GModel:str, DModel:str): + print("START_TRAINING:::::::", enable_finetuning, GModel, DModel) + res = exec_training(enable_finetuning, GModel, DModel) + if res[0] == ERROR: + return {"result":"failed", "detail": f"Start training failed. {res[1]}"} + + return {"result":"success", "detail": f"Start training succeeded. {res[1]}"} + +def mod_post_stop_training(): + res = stop_training() + if res[0] == ERROR: + return {"result":"failed", "detail": f"Stop training failed. {res[1]}"} + + return {"result":"success", "detail": f"Stop training succeeded. {res[1]}"} + +### DEBUG +def mod_get_related_files(): + files = get_file_list(os.path.join(LOG_DIR,"*")) + files.extend([ + "MMVC_Trainer/dataset/multi_speaker_correspondence.txt", + "MMVC_Trainer/train_ms.py", + ]) + files.extend( + get_file_list("MMVC_Trainer/configs/*") + ) + + res = [] + for f in files: + size = os.path.getsize(f) + data = "" + if size < 1024*1024: + with open(f, "r") as input: + data = input.read() + + res.append({ + "name":f, + "size":size, + "data":data + }) + + json_compatible_item_data = jsonable_encoder(res) + return JSONResponse(content=json_compatible_item_data) + +def mod_get_tail_training_log(num:int): + training_log_file = os.path.join(LOG_DIR, "training.txt") + res = sync_exec(f"cat {training_log_file} | sed -e 's/.*\r//' > /tmp/out","/dev/null") + cmd = f'tail -n {num} /tmp/out' + res = sync_exec_with_stdout(cmd, "/dev/null") + if res[0] == ERROR: + return {"result":"failed", "detail": f"Tail training log failed. {res[1]}"} + return {"result":"success", "detail":res[1]} diff --git a/server/mods/VoiceChanger.py b/server/mods/VoiceChanger.py new file mode 100755 index 00000000..3ee4808b --- /dev/null +++ b/server/mods/VoiceChanger.py @@ -0,0 +1,106 @@ +import torch + +from scipy.io.wavfile import write, read +import numpy as np +import traceback + +import utils +import commons +from models import SynthesizerTrn + +from text.symbols import symbols +from data_utils import TextAudioSpeakerLoader, TextAudioSpeakerCollate + +from mel_processing import spectrogram_torch +from text import text_to_sequence, cleaned_text_to_sequence + + + +class VoiceChanger(): + def __init__(self, config, model): + self.hps = utils.get_hparams_from_file(config) + self.net_g = SynthesizerTrn( + len(symbols), + self.hps.data.filter_length // 2 + 1, + self.hps.train.segment_size // self.hps.data.hop_length, + n_speakers=self.hps.data.n_speakers, + **self.hps.model) + self.net_g.eval() + self.gpu_num = torch.cuda.device_count() + utils.load_checkpoint(model, self.net_g, None) + + text_norm = text_to_sequence("a", self.hps.data.text_cleaners) + text_norm = commons.intersperse(text_norm, 0) + self.text_norm = torch.LongTensor(text_norm) + self.audio_buffer = torch.zeros(1, 0) + self.prev_audio = np.zeros(1) + self.mps_enabled = getattr( + torch.backends, "mps", None) is not None and torch.backends.mps.is_available() + + print( + f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})") + + def destroy(self): + del self.net_g + + def on_request(self, gpu, srcId, dstId, timestamp, prefixChunkSize, wav): + unpackedData = wav + convertSize = unpackedData.shape[0] + (prefixChunkSize * 512) + + try: + + audio = torch.FloatTensor(unpackedData.astype(np.float32)) + audio_norm = audio / self.hps.data.max_wav_value + audio_norm = audio_norm.unsqueeze(0) + self.audio_buffer = torch.cat( + [self.audio_buffer, audio_norm], axis=1) + audio_norm = self.audio_buffer[:, -convertSize:] + self.audio_buffer = audio_norm + + spec = spectrogram_torch(audio_norm, self.hps.data.filter_length, + self.hps.data.sampling_rate, self.hps.data.hop_length, self.hps.data.win_length, + center=False) + spec = torch.squeeze(spec, 0) + sid = torch.LongTensor([int(srcId)]) + + data = (self.text_norm, spec, audio_norm, sid) + data = TextAudioSpeakerCollate()([data]) + + # if gpu < 0 or (self.gpu_num == 0 and not self.mps_enabled): + if gpu < 0 or self.gpu_num == 0: + with torch.no_grad(): + x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [ + x.cpu() for x in data] + sid_tgt1 = torch.LongTensor([dstId]).cpu() + audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[ + 0][0, 0].data * self.hps.data.max_wav_value).cpu().float().numpy() + # elif self.mps_enabled == True: # MPS doesnt support aten::weight_norm_interface, and PYTORCH_ENABLE_MPS_FALLBACK=1 cause a big dely. + # x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [ + # x.to("mps") for x in data] + # sid_tgt1 = torch.LongTensor([dstId]).to("mps") + # audio1 = (self.net_g.to("mps").voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[ + # 0][0, 0].data * self.hps.data.max_wav_value).cpu().float().numpy() + + else: + with torch.no_grad(): + x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [ + x.cuda(gpu) for x in data] + sid_tgt1 = torch.LongTensor([dstId]).cuda(gpu) + audio1 = (self.net_g.cuda(gpu).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[ + 0][0, 0].data * self.hps.data.max_wav_value).cpu().float().numpy() + + # if len(self.prev_audio) > unpackedData.shape[0]: + # prevLastFragment = self.prev_audio[-unpackedData.shape[0]:] + # curSecondLastFragment = audio1[-unpackedData.shape[0]*2:-unpackedData.shape[0]] + # print("prev, cur", prevLastFragment.shape, curSecondLastFragment.shape) + # self.prev_audio = audio1 + # print("self.prev_audio", self.prev_audio.shape) + + audio1 = audio1[-unpackedData.shape[0]*2:] + + except Exception as e: + print("VC PROCESSING!!!! EXCEPTION!!!", e) + print(traceback.format_exc()) + + audio1 = audio1.astype(np.int16) + return audio1 diff --git a/server/mods/Whisper.py b/server/mods/Whisper.py new file mode 100755 index 00000000..da4c7c42 --- /dev/null +++ b/server/mods/Whisper.py @@ -0,0 +1,36 @@ +import whisper +import numpy as np +import torchaudio +from scipy.io.wavfile import write + +_MODELS = { + "tiny": "/whisper/tiny.pt", + "base": "/whisper/base.pt", + "small": "/whisper/small.pt", + "medium": "/whisper/medium.pt", +} + + +class Whisper(): + def __init__(self): + self.storedSizeFromTry = 0 + + def loadModel(self, model): + # self.model = whisper.load_model(_MODELS[model], device="cpu") + self.model = whisper.load_model(_MODELS[model]) + self.data = np.zeros(1).astype(np.float) + + def addData(self, unpackedData): + self.data = np.concatenate([self.data, unpackedData], 0) + + def transcribe(self, audio): + received_data_file = "received_data.wav" + write(received_data_file, 24000, self.data.astype(np.int16)) + source, sr = torchaudio.load(received_data_file) + target = torchaudio.functional.resample(source, 24000, 16000) + result = self.model.transcribe(received_data_file) + print("WHISPER1:::", result["text"]) + print("WHISPER2:::", result["segments"]) + self.data = np.zeros(1).astype(np.float) + return result["text"] + diff --git a/server/mods/ssl.py b/server/mods/ssl.py new file mode 100755 index 00000000..88a55238 --- /dev/null +++ b/server/mods/ssl.py @@ -0,0 +1,24 @@ +import os +from OpenSSL import crypto + +def create_self_signed_cert(certfile, keyfile, certargs, cert_dir="."): + C_F = os.path.join(cert_dir, certfile) + K_F = os.path.join(cert_dir, keyfile) + if not os.path.exists(C_F) or not os.path.exists(K_F): + k = crypto.PKey() + k.generate_key(crypto.TYPE_RSA, 2048) + cert = crypto.X509() + cert.get_subject().C = certargs["Country"] + cert.get_subject().ST = certargs["State"] + cert.get_subject().L = certargs["City"] + cert.get_subject().O = certargs["Organization"] + cert.get_subject().OU = certargs["Org. Unit"] + cert.get_subject().CN = 'Example' + cert.set_serial_number(1000) + cert.gmtime_adj_notBefore(0) + cert.gmtime_adj_notAfter(315360000) + cert.set_issuer(cert.get_subject()) + cert.set_pubkey(k) + cert.sign(k, 'sha1') + open(C_F, "wb").write(crypto.dump_certificate(crypto.FILETYPE_PEM, cert)) + open(K_F, "wb").write(crypto.dump_privatekey(crypto.FILETYPE_PEM, k)) \ No newline at end of file diff --git a/server/restapi/MMVC_Rest.py b/server/restapi/MMVC_Rest.py index ec0da4f8..a92c1b48 100644 --- a/server/restapi/MMVC_Rest.py +++ b/server/restapi/MMVC_Rest.py @@ -8,6 +8,10 @@ from voice_changer.VoiceChangerManager import VoiceChangerManager from restapi.MMVC_Rest_Hello import MMVC_Rest_Hello from restapi.MMVC_Rest_VoiceChanger import MMVC_Rest_VoiceChanger from restapi.MMVC_Rest_Fileuploader import MMVC_Rest_Fileuploader +from restapi.MMVC_Rest_Trainer import MMVC_Rest_Trainer + + + class ValidationErrorLoggingRoute(APIRoute): def get_route_handler(self) -> Callable: original_route_handler = super().get_route_handler() @@ -53,6 +57,8 @@ class MMVC_Rest: app_fastapi.include_router(restVoiceChanger.router) fileUploader = MMVC_Rest_Fileuploader(voiceChangerManager) app_fastapi.include_router(fileUploader.router) + trainer = MMVC_Rest_Trainer() + app_fastapi.include_router(trainer.router) cls._instance = app_fastapi return cls._instance diff --git a/server/restapi/MMVC_Rest_Fileuploader.py b/server/restapi/MMVC_Rest_Fileuploader.py index a305d820..b8c93043 100644 --- a/server/restapi/MMVC_Rest_Fileuploader.py +++ b/server/restapi/MMVC_Rest_Fileuploader.py @@ -5,7 +5,7 @@ from fastapi.encoders import jsonable_encoder from fastapi.responses import JSONResponse from fastapi import HTTPException, FastAPI, UploadFile, File, Form -from mods.FileUploader import upload_file, concat_file_chunks +from restapi.mods.FileUploader import upload_file, concat_file_chunks from voice_changer.VoiceChangerManager import VoiceChangerManager UPLOAD_DIR = "upload_dir" diff --git a/server/restapi/MMVC_Rest_Trainer.py b/server/restapi/MMVC_Rest_Trainer.py new file mode 100644 index 00000000..73782f57 --- /dev/null +++ b/server/restapi/MMVC_Rest_Trainer.py @@ -0,0 +1,94 @@ +import os + +from fastapi import APIRouter,Form +from fastapi.encoders import jsonable_encoder +from fastapi.responses import JSONResponse + + +from mods.Trainer_Speakers import mod_get_speakers +from mods.Trainer_Training import mod_post_pre_training, mod_post_start_training, mod_post_stop_training, mod_get_related_files, mod_get_tail_training_log +from mods.Trainer_Model import mod_get_model, mod_delete_model + +from mods.Trainer_Models import mod_get_models +from mods.Trainer_MultiSpeakerSetting import mod_get_multi_speaker_setting, mod_post_multi_speaker_setting +from mods.Trainer_Speaker_Voice import mod_get_speaker_voice +from mods.Trainer_Speaker_Voices import mod_get_speaker_voices + +from mods.Trainer_Speaker import mod_delete_speaker +from dataclasses import dataclass + +INFO_DIR = "info" +os.makedirs(INFO_DIR, exist_ok=True) + +@dataclass +class ExApplicationInfo(): + external_tensorboard_port: int + +exApplitionInfo = ExApplicationInfo(external_tensorboard_port=0) + +class MMVC_Rest_Trainer: + def __init__(self): + self.router = APIRouter() + self.router.add_api_route("/get_speakers", self.get_speakers, methods=["GET"]) + self.router.add_api_route("/delete_speaker", self.delete_speaker, methods=["DELETE"]) + self.router.add_api_route("/get_speaker_voices", self.get_speaker_voices, methods=["GET"]) + self.router.add_api_route("/get_speaker_voice", self.get_speaker_voice, methods=["GET"]) + self.router.add_api_route("/get_multi_speaker_setting", self.get_multi_speaker_setting, methods=["GET"]) + self.router.add_api_route("/post_multi_speaker_setting", self.post_multi_speaker_setting, methods=["POST"]) + self.router.add_api_route("/get_models", self.get_models, methods=["GET"]) + self.router.add_api_route("/get_model", self.get_model, methods=["GET"]) + self.router.add_api_route("/delete_model", self.delete_model, methods=["DELETE"]) + self.router.add_api_route("/post_pre_training", self.post_pre_training, methods=["POST"]) + self.router.add_api_route("/post_start_training", self.post_start_training, methods=["POST"]) + self.router.add_api_route("/post_stop_training", self.post_stop_training, methods=["POST"]) + self.router.add_api_route("/get_related_files", self.get_related_files, methods=["GET"]) + self.router.add_api_route("/get_tail_training_log", self.get_tail_training_log, methods=["GET"]) + self.router.add_api_route("/get_ex_application_info", self.get_ex_application_info, methods=["GET"]) + + def get_speakers(self): + return mod_get_speakers() + + def delete_speaker(self, speaker: str = Form(...)): + return mod_delete_speaker(speaker) + + def get_speaker_voices(self, speaker: str): + return mod_get_speaker_voices(speaker) + + def get_speaker_voice(self, speaker: str, voice: str): + return mod_get_speaker_voice(speaker, voice) + + def get_multi_speaker_setting(self): + return mod_get_multi_speaker_setting() + + def post_multi_speaker_setting(self, setting: str = Form(...)): + return mod_post_multi_speaker_setting(setting) + + def get_models(self): + return mod_get_models() + + def get_model(self, model: str): + return mod_get_model(model) + + def delete_model(self, model: str = Form(...)): + return mod_delete_model(model) + + def post_pre_training(self, batch: int = Form(...)): + return mod_post_pre_training(batch) + + def post_start_training(self, enable_finetuning: bool = Form(...),GModel: str = Form(...),DModel: str = Form(...)): + print("POST START TRAINING..") + return mod_post_start_training(enable_finetuning, GModel, DModel) + + def post_stop_training(self): + print("POST STOP TRAINING..") + return mod_post_stop_training() + + def get_related_files(self): + return mod_get_related_files() + + def get_tail_training_log(self, num: int): + return mod_get_tail_training_log(num) + + def get_ex_application_info(self): + json_compatible_item_data = jsonable_encoder(exApplitionInfo) + return JSONResponse(content=json_compatible_item_data) diff --git a/server/restapi/mods/FileUploader.py b/server/restapi/mods/FileUploader.py new file mode 100755 index 00000000..74d04a9f --- /dev/null +++ b/server/restapi/mods/FileUploader.py @@ -0,0 +1,27 @@ +import os, shutil +from fastapi import UploadFile + +# UPLOAD_DIR = "model_upload_dir" + +def upload_file(upload_dirname:str, file:UploadFile, filename: str): + if file and filename: + fileobj = file.file + upload_dir = open(os.path.join(upload_dirname, filename),'wb+') + shutil.copyfileobj(fileobj, upload_dir) + upload_dir.close() + return {"uploaded files": f"{filename} "} + return {"Error": "uploaded file is not found."} + +def concat_file_chunks(upload_dirname:str, filename:str, chunkNum:int, dest_dirname:str): + target_file_name = os.path.join(dest_dirname, filename) + with open(target_file_name, "ab") as target_file: + for i in range(chunkNum): + chunkName = f"{filename}_{i}" + chunk_file_path = os.path.join(upload_dirname, chunkName) + stored_chunk_file = open(chunk_file_path, 'rb') + target_file.write(stored_chunk_file.read()) + stored_chunk_file.close() + os.unlink(chunk_file_path) + target_file.close() + return target_file_name + diff --git a/server/trainer_mods/files.py b/server/trainer_mods/files.py new file mode 100755 index 00000000..2aa1c147 --- /dev/null +++ b/server/trainer_mods/files.py @@ -0,0 +1,24 @@ +import os,glob + + +def get_file_list(top_dir): + for root, dirs, files in os.walk(top_dir): + for dir in dirs: + dirPath = os.path.join(root, dir) + print(f'dirPath = {dirPath}') + + for file in files: + filePath = os.path.join(root, file) + print(f'filePath = {filePath}') + + +def get_dir_list(top_dir): + dirlist = [] + files = os.listdir(top_dir) + for filename in files: + if os.path.isdir(os.path.join(top_dir, filename)): + dirlist.append(filename) + return dirlist + +def get_file_list(top_dir): + return glob.glob(top_dir) \ No newline at end of file