hubert/content vec uploader

2025-02-02 16:23:58 +03:00 · 2023-03-13 21:07:35 +09:00 · 2023-03-13 21:07:35 +09:00 · d5cc5c9e28
commit d5cc5c9e28
parent dd30c70f87
12 changed files with 1001 additions and 82 deletions
--- a/client/demo_so-vits-svc_40v2/dist/index.html
+++ b/client/demo_so-vits-svc_40v2/dist/index.html
@ -1 +1,10 @@
-<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
+<!DOCTYPE html>
+<html style="width: 100%; height: 100%; overflow: hidden">
+    <head>
+        <meta charset="utf-8" />
+        <title>Voice Changer Client Demo</title>
+    <script defer src="index.js"></script></head>
+    <body style="width: 100%; height: 100%; margin: 0px">
+        <div id="app" style="width: 100%; height: 100%"></div>
+    </body>
+</html>
--- a/client/demo_so-vits-svc_40v2/dist/index.js
+++ b/client/demo_so-vits-svc_40v2/dist/index.js
--- a/client/demo_so-vits-svc_40v2/dist/index.js.LICENSE.txt
+++ b/client/demo_so-vits-svc_40v2/dist/index.js.LICENSE.txt
@ -1,31 +0,0 @@
-/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
-
-/**
- * @license React
- * react-dom.production.min.js
- *
- * Copyright (c) Facebook, Inc. and its affiliates.
- *
- * This source code is licensed under the MIT license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-/**
- * @license React
- * react.production.min.js
- *
- * Copyright (c) Facebook, Inc. and its affiliates.
- *
- * This source code is licensed under the MIT license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-/**
- * @license React
- * scheduler.production.min.js
- *
- * Copyright (c) Facebook, Inc. and its affiliates.
- *
- * This source code is licensed under the MIT license found in the
- * LICENSE file in the root directory of this source tree.
- */
--- a/client/demo_so-vits-svc_40v2/src/102_model_setting.tsx
+++ b/client/demo_so-vits-svc_40v2/src/102_model_setting.tsx
@ -64,26 +64,48 @@ export const useModelSettingArea = (): ServerSettingState => {
                configFile: null
            })
        }
-        const onOnnxFileLoadClicked = async () => {
+
+        const onHubertFileLoadClicked = async () => {
            const file = await fileSelector("")
-            if (file.name.endsWith(".onnx") == false) {
-                alert("モデルファイルの拡張子はonnxである必要があります。")
+            if (file.name.endsWith(".pth") == false) {
+                alert("モデルファイルの拡張子はpthである必要があります。")
                return
            }
            appState.serverSetting.setFileUploadSetting({
                ...appState.serverSetting.fileUploadSetting,
-                onnxModel: {
+                hubertTorchModel: {
                    file: file
                }
            })
        }
-        const onOnnxFileClearClicked = () => {
+
+        const onHubertFileClearClicked = () => {
            appState.serverSetting.setFileUploadSetting({
                ...appState.serverSetting.fileUploadSetting,
-                onnxModel: null
+                hubertTorchModel: null
            })
        }

+        // const onOnnxFileLoadClicked = async () => {
+        //     const file = await fileSelector("")
+        //     if (file.name.endsWith(".onnx") == false) {
+        //         alert("モデルファイルの拡張子はonnxである必要があります。")
+        //         return
+        //     }
+        //     appState.serverSetting.setFileUploadSetting({
+        //         ...appState.serverSetting.fileUploadSetting,
+        //         onnxModel: {
+        //             file: file
+        //         }
+        //     })
+        // }
+        // const onOnnxFileClearClicked = () => {
+        //     appState.serverSetting.setFileUploadSetting({
+        //         ...appState.serverSetting.fileUploadSetting,
+        //         onnxModel: null
+        //     })
+        // }
+
        const onModelUploadClicked = async () => {
            appState.serverSetting.loadModel()
        }
@ -93,7 +115,9 @@ export const useModelSettingArea = (): ServerSettingState => {
        const uploadButtonLabel = appState.serverSetting.isUploading ? "wait..." : "upload"

        const configFilenameText = appState.serverSetting.fileUploadSetting.configFile?.filename || appState.serverSetting.fileUploadSetting.configFile?.file?.name || ""
-        const onnxModelFilenameText = appState.serverSetting.fileUploadSetting.onnxModel?.filename || appState.serverSetting.fileUploadSetting.onnxModel?.file?.name || ""
+        const hubertModelFilenameText = appState.serverSetting.fileUploadSetting.hubertTorchModel?.filename || appState.serverSetting.fileUploadSetting.hubertTorchModel?.file?.name || ""
+        // const onnxModelFilenameText = appState.serverSetting.fileUploadSetting.onnxModel?.filename || appState.serverSetting.fileUploadSetting.onnxModel?.file?.name || ""
+
        const pyTorchFilenameText = appState.serverSetting.fileUploadSetting.pyTorchModel?.filename || appState.serverSetting.fileUploadSetting.pyTorchModel?.file?.name || ""

        const uploadingStatus = appState.serverSetting.isUploading ?
@ -125,6 +149,16 @@ export const useModelSettingArea = (): ServerSettingState => {
                        <div className="body-button left-margin-1" onClick={onConfigFileClearClicked}>clear</div>
                    </div>
                </div>
+                <div className="body-row split-3-3-4 left-padding-1 guided">
+                    <div className="body-item-title left-padding-2">hubert(.pth)</div>
+                    <div className="body-item-text">
+                        <div>{hubertModelFilenameText}</div>
+                    </div>
+                    <div className="body-button-container">
+                        <div className="body-button" onClick={onHubertFileLoadClicked}>select</div>
+                        <div className="body-button left-margin-1" onClick={onHubertFileClearClicked}>clear</div>
+                    </div>
+                </div>
                {/* <div className="body-row split-3-3-4 left-padding-1 guided">
                    <div className="body-item-title left-padding-2">Onnx(.onnx)</div>
                    <div className="body-item-text">
--- a/client/lib/src/ServerConfigurator.ts
+++ b/client/lib/src/ServerConfigurator.ts
@ -109,13 +109,15 @@ export class ServerConfigurator {
        })
    }

-    loadModel = async (configFilename: string, pyTorchModelFilename: string | null, onnxModelFilename: string | null) => {
+    loadModel = async (configFilename: string, pyTorchModelFilename: string | null, onnxModelFilename: string | null, hubertTorchModelFilename: string | null) => {
        const url = this.serverUrl + "/load_model"
        const info = new Promise<ServerInfo>(async (resolve) => {
            const formData = new FormData();
            formData.append("pyTorchModelFilename", pyTorchModelFilename || "-");
            formData.append("onnxModelFilename", onnxModelFilename || "-");
            formData.append("configFilename", configFilename);
+            formData.append("hubertTorchModelFilename", hubertTorchModelFilename || "-");
+
            const request = new Request(url, {
                method: 'POST',
                body: formData,
--- a/client/lib/src/VoiceChangerClient.ts
+++ b/client/lib/src/VoiceChangerClient.ts
@ -243,8 +243,8 @@ export class VoiceChangerClient {
    concatUploadedFile = (filename: string, chunkNum: number) => {
        return this.configurator.concatUploadedFile(filename, chunkNum)
    }
-    loadModel = (configFilename: string, pyTorchModelFilename: string | null, onnxModelFilename: string | null) => {
-        return this.configurator.loadModel(configFilename, pyTorchModelFilename, onnxModelFilename)
+    loadModel = (configFilename: string, pyTorchModelFilename: string | null, onnxModelFilename: string | null, hubertTorchModelFilename: string | null) => {
+        return this.configurator.loadModel(configFilename, pyTorchModelFilename, onnxModelFilename, hubertTorchModelFilename)
    }

    //##  Worklet ##//
--- a/client/lib/src/hooks/useServerSetting.ts
+++ b/client/lib/src/hooks/useServerSetting.ts
@ -14,12 +14,14 @@ export type FileUploadSetting = {
    pyTorchModel: ModelData | null
    onnxModel: ModelData | null
    configFile: ModelData | null
+    hubertTorchModel: ModelData | null
 }

 const InitialFileUploadSetting: FileUploadSetting = {
    pyTorchModel: null,
    configFile: null,
    onnxModel: null,
+    hubertTorchModel: null
 }

 export type UseServerSettingProps = {
@ -144,6 +146,10 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
                alert("Configファイルを指定する必要があります。")
                return
            }
+            if (props.clientType == "so_vits_svc_40v2c" && !fileUploadSetting.hubertTorchModel) {
+                alert("content vecのファイルを指定する必要があります。")
+                return
+            }
            if (!props.voiceChangerClient) return

            setUploadProgress(0)
@ -162,9 +168,13 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
                fileUploadSetting.configFile.data = await fileUploadSetting.configFile.file!.arrayBuffer()
                fileUploadSetting.configFile.filename = await fileUploadSetting.configFile.file!.name
            }
+            if (props.clientType == "so_vits_svc_40v2c" && !fileUploadSetting.hubertTorchModel!.data) {
+                fileUploadSetting.hubertTorchModel!.data = await fileUploadSetting.hubertTorchModel!.file!.arrayBuffer()
+                fileUploadSetting.hubertTorchModel!.filename = await fileUploadSetting.hubertTorchModel!.file!.name
+            }

            // ファイルをサーバにアップロード
-            const models = [fileUploadSetting.onnxModel, fileUploadSetting.pyTorchModel].filter(x => { return x != null }) as ModelData[]
+            const models = [fileUploadSetting.onnxModel, fileUploadSetting.pyTorchModel, fileUploadSetting.hubertTorchModel].filter(x => { return x != null }) as ModelData[]
            for (let i = 0; i < models.length; i++) {
                const progRate = 1 / models.length
                const progOffset = 100 * i * progRate
@ -178,13 +188,17 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
                console.log(progress, end)
            })

-            const loadPromise = props.voiceChangerClient.loadModel(fileUploadSetting.configFile.filename!, fileUploadSetting.pyTorchModel?.filename || null, fileUploadSetting.onnxModel?.filename || null)
+            const loadPromise = props.voiceChangerClient.loadModel(fileUploadSetting.configFile.filename!, fileUploadSetting.pyTorchModel?.filename || null, fileUploadSetting.onnxModel?.filename || null, fileUploadSetting.hubertTorchModel?.filename || null)

            // サーバでロード中にキャッシュにセーブ
            const saveData: FileUploadSetting = {
                pyTorchModel: fileUploadSetting.pyTorchModel ? { data: fileUploadSetting.pyTorchModel.data, filename: fileUploadSetting.pyTorchModel.filename } : null,
                onnxModel: fileUploadSetting.onnxModel ? { data: fileUploadSetting.onnxModel.data, filename: fileUploadSetting.onnxModel.filename } : null,
-                configFile: { data: fileUploadSetting.configFile.data, filename: fileUploadSetting.configFile.filename }
+                configFile: { data: fileUploadSetting.configFile.data, filename: fileUploadSetting.configFile.filename },
+                hubertTorchModel: fileUploadSetting.hubertTorchModel ? {
+                    data: fileUploadSetting.hubertTorchModel.data, filename: fileUploadSetting.hubertTorchModel.filename
+                } : null
+
            }
            setItem(INDEXEDDB_KEY_MODEL_DATA, saveData)

@ -193,7 +207,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
            setIsUploading(false)
            reloadServerInfo()
        }
-    }, [fileUploadSetting, props.voiceChangerClient])
+    }, [fileUploadSetting, props.voiceChangerClient, props.clientType])

    const reloadServerInfo = useMemo(() => {
        return async () => {
--- a/server/MMVCServerSIO.py
+++ b/server/MMVCServerSIO.py
@ -39,6 +39,7 @@ def setupArgParser():
                        default=False, help="run on colab")
    parser.add_argument("--modelType", type=str,
                        default="MMVCv15", help="model type: MMVCv13, MMVCv15, so-vits-svc-40v2")
+    parser.add_argument("--hubert", type=str, help="path to hubert model")

    return parser

@ -79,6 +80,7 @@ PORT = args.p
 CONFIG = args.c
 MODEL = args.m if args.m != None else None
 ONNX_MODEL = args.o if args.o != None else None
+HUBERT_MODEL = args.hubert if args.hubert != None else None
 MODEL_TYPE = os.environ.get('MODEL_TYPE', None)
 if MODEL_TYPE == None:
    MODEL_TYPE = args.modelType
@ -100,7 +102,11 @@ if args.colab == True:

 voiceChangerManager = VoiceChangerManager.get_instance()
 if CONFIG and (MODEL or ONNX_MODEL):
-    voiceChangerManager.loadModel(CONFIG, MODEL, ONNX_MODEL)
+    if MODEL_TYPE == "MMVCv15" or MODEL_TYPE == "MMVCv13":
+        voiceChangerManager.loadModel(CONFIG, MODEL, ONNX_MODEL, None)
+    else:
+        voiceChangerManager.loadModel(CONFIG, MODEL, ONNX_MODEL, HUBERT_MODEL)
+
 app_fastapi = MMVC_Rest.get_instance(voiceChangerManager)
 app_socketio = MMVC_SocketIOApp.get_instance(app_fastapi, voiceChangerManager)

--- a/server/restapi/MMVC_Rest_Fileuploader.py
+++ b/server/restapi/MMVC_Rest_Fileuploader.py
@ -1,4 +1,5 @@
-import os,shutil
+import os
+import shutil
 from typing import Union
 from fastapi import APIRouter
 from fastapi.encoders import jsonable_encoder
@ -8,23 +9,24 @@ from fastapi import HTTPException, FastAPI, UploadFile, File, Form
 from restapi.mods.FileUploader import upload_file, concat_file_chunks
 from voice_changer.VoiceChangerManager import VoiceChangerManager

-from const import MODEL_DIR ,UPLOAD_DIR
+from const import MODEL_DIR, UPLOAD_DIR
 os.makedirs(UPLOAD_DIR, exist_ok=True)
 os.makedirs(MODEL_DIR, exist_ok=True)

+
 class MMVC_Rest_Fileuploader:
-    def __init__(self, voiceChangerManager:VoiceChangerManager):
+    def __init__(self, voiceChangerManager: VoiceChangerManager):
        self.voiceChangerManager = voiceChangerManager
        self.router = APIRouter()
        self.router.add_api_route("/info", self.get_info, methods=["GET"])
        self.router.add_api_route("/upload_file", self.post_upload_file, methods=["POST"])
        self.router.add_api_route("/concat_uploaded_file", self.post_concat_uploaded_file, methods=["POST"])
-        self.router.add_api_route("/update_setteings",self.post_update_setteings, methods=["POST"])
+        self.router.add_api_route("/update_setteings", self.post_update_setteings, methods=["POST"])
        self.router.add_api_route("/load_model", self.post_load_model, methods=["POST"])
        self.router.add_api_route("/load_model_for_train", self.post_load_model_for_train, methods=["POST"])
        self.router.add_api_route("/extract_voices", self.post_extract_voices, methods=["POST"])

-        self.onnx_provider=""
+        self.onnx_provider = ""

    def post_upload_file(self, file: UploadFile = File(...), filename: str = Form(...)):
        res = upload_file(UPLOAD_DIR, file, filename)
@ -41,7 +43,7 @@ class MMVC_Rest_Fileuploader:
        json_compatible_item_data = jsonable_encoder(info)
        return JSONResponse(content=json_compatible_item_data)

-    def post_update_setteings(self, key:str=Form(...), val:Union[int, str, float]=Form(...)):
+    def post_update_setteings(self, key: str = Form(...), val: Union[int, str, float] = Form(...)):
        print("post_update_setteings", key, val)
        info = self.voiceChangerManager.update_setteings(key, val)
        json_compatible_item_data = jsonable_encoder(info)
@ -51,19 +53,20 @@ class MMVC_Rest_Fileuploader:
        self,
        pyTorchModelFilename: str = Form(...),
        onnxModelFilename: str = Form(...),
-        configFilename: str = Form(...)
+        configFilename: str = Form(...),
+        hubertTorchModelFilename: str = Form(...)
    ):
-
+        print("Hubert:", hubertTorchModelFilename)
        pyTorchModelFilePath = os.path.join(UPLOAD_DIR, pyTorchModelFilename) if pyTorchModelFilename != "-" else None
        onnxModelFilePath = os.path.join(UPLOAD_DIR, onnxModelFilename) if onnxModelFilename != "-" else None
        configFilePath = os.path.join(UPLOAD_DIR, configFilename)
+        hubertTorchModelFilePath = os.path.join(UPLOAD_DIR, hubertTorchModelFilename) if hubertTorchModelFilename != "-" else None

-        info = self.voiceChangerManager.loadModel(configFilePath, pyTorchModelFilePath, onnxModelFilePath)
+        info = self.voiceChangerManager.loadModel(configFilePath, pyTorchModelFilePath, onnxModelFilePath, hubertTorchModelFilePath)
        json_compatible_item_data = jsonable_encoder(info)
        return JSONResponse(content=json_compatible_item_data)
        # return {"load": f"{configFilePath}, {pyTorchModelFilePath}, {onnxModelFilePath}"}

-
    def post_load_model_for_train(
        self,
        modelGFilename: str = Form(...),
@ -86,4 +89,3 @@ class MMVC_Rest_Fileuploader:
            UPLOAD_DIR, zipFilename, zipFileChunkNum, UPLOAD_DIR)
        shutil.unpack_archive(zipFilePath, "MMVC_Trainer/dataset/textful/")
        return {"Zip file unpacked": f"{zipFilePath}"}
-        
--- a/server/voice_changer/SoVitsSvc40v2/SoVitsSvc40v2.py
+++ b/server/voice_changer/SoVitsSvc40v2/SoVitsSvc40v2.py
@ -58,12 +58,13 @@ class SoVitsSvc40v2:
        self.gpu_num = torch.cuda.device_count()
        self.prevVol = 0

-    def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None):
+    def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None, hubertTorchModel: str = None):
        self.settings.configFile = config
        self.hps = utils.get_hparams_from_file(config)

        # hubert model
-        vec_path = "hubert/checkpoint_best_legacy_500.pt"
+        # vec_path = "hubert/checkpoint_best_legacy_500.pt"
+        vec_path = hubertTorchModel
        models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
            [vec_path],
            suffix="",
--- a/server/voice_changer/VoiceChanger.py
+++ b/server/voice_changer/VoiceChanger.py
@ -48,15 +48,15 @@ class VoiceChanger():
        self.currentCrossFadeOverlapSize = 0  # setting
        self.crossfadeSize = 0  # calculated

-        modelType = getModelType()
-        print("[VoiceChanger] activate model type:", modelType)
-        if modelType == "MMVCv15":
+        self.modelType = getModelType()
+        print("[VoiceChanger] activate model type:", self.modelType)
+        if self.modelType == "MMVCv15":
            from voice_changer.MMVCv15.MMVCv15 import MMVCv15
            self.voiceChanger = MMVCv15()
-        elif modelType == "MMVCv13":
+        elif self.modelType == "MMVCv13":
            from voice_changer.MMVCv13.MMVCv13 import MMVCv13
            self.voiceChanger = MMVCv13()
-        elif modelType == "so-vits-svc-40v2":
+        elif self.modelType == "so-vits-svc-40v2":
            from voice_changer.SoVitsSvc40v2.SoVitsSvc40v2 import SoVitsSvc40v2
            self.voiceChanger = SoVitsSvc40v2()

@ -70,8 +70,11 @@ class VoiceChanger():

        print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")

-    def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None):
+    def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None, hubertTorchModel: str = None):
+        if self.modelType == "MMVCv15" or self.modelType == "MMVCv13":
            return self.voiceChanger.loadModel(config, pyTorch_model_file, onnx_model_file)
+        else:  # so-vits-svc-40v2
+            return self.voiceChanger.loadModel(config, pyTorch_model_file, onnx_model_file, hubertTorchModel)

    def get_info(self):
        data = asdict(self.settings)
@ -165,15 +168,6 @@ class VoiceChanger():
            print_convert_processing(
                f" Crossfade data size: crossfade:{crossfadeSize}, crossfade setting:{self.settings.crossFadeOverlapSize}, input size:{inputSize}")

-            # if convertSize < 8192:
-            #     convertSize = 8192
-
-            # if convertSize % processing_hop_length != 0:  # モデルの出力のホップサイズで切り捨てが発生するので補う。
-            #     convertSize = convertSize + (processing_hop_length - (convertSize % processing_hop_length))
-
-            # overlapSize = min(self.settings.crossFadeOverlapSize, inputSize)
-            # cropRange = (-1 * (inputSize + overlapSize), -1 * overlapSize)
-
            print_convert_processing(f" Convert data size of {inputSize + crossfadeSize} (+ extra size)")
            print_convert_processing(f"         will be cropped:{-1 * (inputSize + crossfadeSize)}, {-1 * (crossfadeSize)}")

--- a/server/voice_changer/VoiceChangerManager.py
+++ b/server/voice_changer/VoiceChangerManager.py
@ -10,8 +10,8 @@ class VoiceChangerManager():
            cls._instance.voiceChanger = VoiceChanger()
        return cls._instance

-    def loadModel(self, config, model, onnx_model):
-        info = self.voiceChanger.loadModel(config, model, onnx_model)
+    def loadModel(self, config, model, onnx_model, hubertTorchModel):
+        info = self.voiceChanger.loadModel(config, model, onnx_model, hubertTorchModel)
        info["status"] = "OK"
        return info