WIP: integrate vcs to new gui 4

This commit is contained in:
wataru 2023-06-23 14:54:39 +09:00
parent 4b212bd442
commit 59f558ef93
14 changed files with 104 additions and 203 deletions

File diff suppressed because one or more lines are too long

View File

@ -1,7 +1,7 @@
import React, { useEffect, useMemo, useState } from "react";
import { useGuiState } from "./001_GuiStateProvider";
import { useAppState } from "../../001_provider/001_AppStateProvider";
import { MergeElement, RVCModelSlot, RVCModelType } from "@dannadori/voice-changer-client-js";
import { MergeElement, RVCModelSlot, RVCModelType, VoiceChangerType } from "@dannadori/voice-changer-client-js";
export const MergeLabDialog = () => {
@ -104,10 +104,8 @@ export const MergeLabDialog = () => {
const onMergeClicked = () => {
serverSetting.mergeModel({
voiceChangerType: VoiceChangerType.RVC,
command: "mix",
defaultTune: 0,
defaultIndexRatio: 1,
defaultProtect: 0.5,
files: mergeElements
})
}

View File

@ -602,9 +602,7 @@ export type MergeElement = {
strength: number
}
export type MergeModelRequest = {
voiceChangerType: VoiceChangerType
command: "mix",
defaultTune: number,
defaultIndexRatio: number,
defaultProtect: number,
files: MergeElement[]
}

View File

@ -153,23 +153,7 @@ class AudioDataset(Dataset):
start_frame = int(idx_from / frame_resolution)
units_frame_len = int(waveform_sec / frame_resolution)
aug_flag = random.choice([True, False]) and self.use_aug
"""
audio = data_buffer.get('audio')
if audio is None:
path_audio = os.path.join(self.path_root, 'audio', name) + '.wav'
audio, sr = librosa.load(
path_audio,
sr = self.sample_rate,
offset = start_frame * frame_resolution,
duration = waveform_sec)
if len(audio.shape) > 1:
audio = librosa.to_mono(audio)
# clip audio into N seconds
audio = audio[ : audio.shape[-1] // self.hop_size * self.hop_size]
audio = torch.from_numpy(audio).float()
else:
audio = audio[start_frame * self.hop_size : (start_frame + units_frame_len) * self.hop_size]
"""
# load mel
mel_key = "aug_mel" if aug_flag else "mel"
mel = data_buffer.get(mel_key)

View File

@ -11,84 +11,6 @@ class NoiseScheduleVP:
continuous_beta_1=20.0,
dtype=torch.float32,
):
"""Create a wrapper class for the forward SDE (VP type).
***
Update: We support discrete-time diffusion models by implementing a picewise linear interpolation for log_alpha_t.
We recommend to use schedule='discrete' for the discrete-time diffusion models, especially for high-resolution images.
***
The forward SDE ensures that the condition distribution q_{t|0}(x_t | x_0) = N ( alpha_t * x_0, sigma_t^2 * I ).
We further define lambda_t = log(alpha_t) - log(sigma_t), which is the half-logSNR (described in the DPM-Solver paper).
Therefore, we implement the functions for computing alpha_t, sigma_t and lambda_t. For t in [0, T], we have:
log_alpha_t = self.marginal_log_mean_coeff(t)
sigma_t = self.marginal_std(t)
lambda_t = self.marginal_lambda(t)
Moreover, as lambda(t) is an invertible function, we also support its inverse function:
t = self.inverse_lambda(lambda_t)
===============================================================
We support both discrete-time DPMs (trained on n = 0, 1, ..., N-1) and continuous-time DPMs (trained on t in [t_0, T]).
1. For discrete-time DPMs:
For discrete-time DPMs trained on n = 0, 1, ..., N-1, we convert the discrete steps to continuous time steps by:
t_i = (i + 1) / N
e.g. for N = 1000, we have t_0 = 1e-3 and T = t_{N-1} = 1.
We solve the corresponding diffusion ODE from time T = 1 to time t_0 = 1e-3.
Args:
betas: A `torch.Tensor`. The beta array for the discrete-time DPM. (See the original DDPM paper for details)
alphas_cumprod: A `torch.Tensor`. The cumprod alphas for the discrete-time DPM. (See the original DDPM paper for details)
Note that we always have alphas_cumprod = cumprod(1 - betas). Therefore, we only need to set one of `betas` and `alphas_cumprod`.
**Important**: Please pay special attention for the args for `alphas_cumprod`:
The `alphas_cumprod` is the \hat{alpha_n} arrays in the notations of DDPM. Specifically, DDPMs assume that
q_{t_n | 0}(x_{t_n} | x_0) = N ( \sqrt{\hat{alpha_n}} * x_0, (1 - \hat{alpha_n}) * I ).
Therefore, the notation \hat{alpha_n} is different from the notation alpha_t in DPM-Solver. In fact, we have
alpha_{t_n} = \sqrt{\hat{alpha_n}},
and
log(alpha_{t_n}) = 0.5 * log(\hat{alpha_n}).
2. For continuous-time DPMs:
We support the linear VPSDE for the continuous time setting. The hyperparameters for the noise
schedule are the default settings in Yang Song's ScoreSDE:
Args:
beta_min: A `float` number. The smallest beta for the linear schedule.
beta_max: A `float` number. The largest beta for the linear schedule.
T: A `float` number. The ending time of the forward process.
===============================================================
Args:
schedule: A `str`. The noise schedule of the forward SDE. 'discrete' for discrete-time DPMs,
'linear' for continuous-time DPMs.
Returns:
A wrapper object of the forward SDE (VP type).
===============================================================
Example:
# For discrete-time DPMs, given betas (the beta array for n = 0, 1, ..., N - 1):
>>> ns = NoiseScheduleVP('discrete', betas=betas)
# For discrete-time DPMs, given alphas_cumprod (the \hat{alpha_n} array for n = 0, 1, ..., N - 1):
>>> ns = NoiseScheduleVP('discrete', alphas_cumprod=alphas_cumprod)
# For continuous-time DPMs (VPSDE), linear schedule:
>>> ns = NoiseScheduleVP('linear', continuous_beta_0=0.1, continuous_beta_1=20.)
"""
if schedule not in ["discrete", "linear"]:
raise ValueError("Unsupported noise schedule {}. The schedule needs to be 'discrete' or 'linear'".format(schedule))

View File

@ -197,8 +197,7 @@ class RVC(VoiceChangerModel):
pass
def export2onnx(self):
allModelSlots = self.modelSlotManager.getAllSlotInfo()
modelSlot = allModelSlots[self.settings.modelSlotIndex]
modelSlot = self.slotInfo
if modelSlot.isONNX:
print("[Voice Changer] export2onnx, No pyTorch filepath.")

View File

@ -0,0 +1,36 @@
import os
import torch
from const import UPLOAD_DIR
from voice_changer.RVC.modelMerger.MergeModel import merge_model
from voice_changer.utils.ModelMerger import ModelMerger, ModelMergerRequest
class RVCModelMerger(ModelMerger):
@classmethod
def merge_models(cls, request: ModelMergerRequest, storeSlot: int):
print("[Voice Changer] MergeRequest:", request)
merged = merge_model(request)
# いったんは、アップロードフォルダに格納する。(歴史的経緯)
# 後続のloadmodelを呼び出すことで永続化モデルフォルダに移動させられる。
storeDir = os.path.join(UPLOAD_DIR, f"{storeSlot}")
print("[Voice Changer] store merged model to:", storeDir)
os.makedirs(storeDir, exist_ok=True)
storeFile = os.path.join(storeDir, "merged.pth")
torch.save(merged, storeFile)
return storeFile
# # loadmodelを呼び出して永続化モデルフォルダに移動させる。
# params = {
# "defaultTune": req.defaultTune,
# "defaultIndexRatio": req.defaultIndexRatio,
# "defaultProtect": req.defaultProtect,
# "sampleId": "",
# "files": {"rvcModel": storeFile},
# }
# props: LoadModelParams = LoadModelParams(slot=targetSlot, isHalf=True, params=params)
# self.loadModel(props)
# self.prepareModel(targetSlot)
# self.settings.modelSlotIndex = targetSlot
# self.currentSlot = self.settings.modelSlotIndex

View File

@ -1,10 +1,12 @@
from typing import Dict, Any
from voice_changer.RVC.modelMerger.MergeModelRequest import MergeModelRequest
from collections import OrderedDict
import torch
from voice_changer.utils.ModelMerger import ModelMergerRequest
def merge_model(request: MergeModelRequest):
def merge_model(request: ModelMergerRequest):
def extract(ckpt: Dict[str, Any]):
a = ckpt["model"]
opt: Dict[str, Any] = OrderedDict()
@ -61,13 +63,7 @@ def merge_model(request: MergeModelRequest):
merged["sr"] = state_dict["sr"]
merged["f0"] = state_dict["f0"]
merged["info"] = state_dict["info"]
merged["embedder_name"] = (
state_dict["embedder_name"] if "embedder_name" in state_dict else None
)
merged["embedder_output_layer"] = (
state_dict["embedder_output_layer"]
if "embedder_output_layer" in state_dict
else None
)
merged["embedder_name"] = state_dict["embedder_name"] if "embedder_name" in state_dict else None
merged["embedder_output_layer"] = state_dict["embedder_output_layer"] if "embedder_output_layer" in state_dict else None
print("write metadata done.")
return merged

View File

@ -1,21 +0,0 @@
from dataclasses import dataclass, field
from typing import List
from dataclasses_json import dataclass_json
@dataclass_json
@dataclass
class MergeFile:
filename: str
strength: int
@dataclass_json
@dataclass
class MergeModelRequest:
command: str = ""
slot: int = -1
defaultTune: int = 0
defaultIndexRatio: int = 1
defaultProtect: float = 0.5
files: List[MergeFile] = field(default_factory=lambda: [])

View File

@ -0,0 +1,21 @@
# from dataclasses import dataclass, field
# from typing import List
# from dataclasses_json import dataclass_json
# @dataclass_json
# @dataclass
# class MergeFile:
# filename: str
# strength: int
# @dataclass_json
# @dataclass
# class MergeModelRequest:
# command: str = ""
# slot: int = -1
# defaultTune: int = 0
# defaultIndexRatio: int = 1
# defaultProtect: float = 0.5
# files: List[MergeFile] = field(default_factory=lambda: [])

View File

@ -83,49 +83,6 @@ class VoiceChanger:
def setModel(self, model: Any):
self.voiceChanger = model
# def switchModelType(self, modelType: ModelType):
# print("Switch Model Type:", modelType)
# try:
# if self.voiceChanger is not None:
# # return {"status": "ERROR", "msg": "vc is already selected. currently re-select is not implemented"}
# del self.voiceChanger
# self.voiceChanger = None
# self.modelType = modelType
# if self.modelType == "MMVCv15":
# from voice_changer.MMVCv15.MMVCv15 import MMVCv15
# self.voiceChanger = MMVCv15() # type: ignore
# elif self.modelType == "MMVCv13":
# from voice_changer.MMVCv13.MMVCv13 import MMVCv13
# self.voiceChanger = MMVCv13()
# elif self.modelType == "so-vits-svc-40v2":
# from voice_changer.SoVitsSvc40v2.SoVitsSvc40v2 import SoVitsSvc40v2
# self.voiceChanger = SoVitsSvc40v2(self.params)
# elif self.modelType == "so-vits-svc-40" or self.modelType == "so-vits-svc-40_c":
# from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
# self.voiceChanger = SoVitsSvc40(self.params)
# elif self.modelType == "DDSP-SVC":
# from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
# self.voiceChanger = DDSP_SVC(self.params)
# elif self.modelType == "RVC":
# from voice_changer.RVC.RVC import RVC
# self.voiceChanger = RVC(self.params)
# else:
# from voice_changer.MMVCv13.MMVCv13 import MMVCv13
# self.voiceChanger = MMVCv13()
# except Exception as e:
# print(e)
# print(traceback.format_exc())
# print("Switch Model Type:", self.voiceChanger)
# return {"status": "OK", "msg": "vc is switched."}
def getModelType(self):
if self.modelType is not None:
return {"status": "OK", "vc": self.modelType}
@ -384,27 +341,6 @@ class VoiceChanger:
self.voiceChanger.merge_models(request)
return self.get_info()
def update_model_default(self):
if self.voiceChanger is None:
print("[Voice Changer] Voice Changer is not selected.")
return
self.voiceChanger.update_model_default()
return self.get_info()
def update_model_info(self, newData: str):
if self.voiceChanger is None:
print("[Voice Changer] Voice Changer is not selected.")
return
self.voiceChanger.update_model_info(newData)
return self.get_info()
def upload_model_assets(self, params: str):
if self.voiceChanger is None:
print("[Voice Changer] Voice Changer is not selected.")
return
self.voiceChanger.upload_model_assets(params)
return self.get_info()
PRINT_CONVERT_PROCESSING: bool = False
# PRINT_CONVERT_PROCESSING = True

View File

@ -5,9 +5,11 @@ import numpy as np
from downloader.SampleDownloader import downloadSample, getSampleInfos
from voice_changer.Local.ServerDevice import ServerDevice, ServerDeviceCallbacks
from voice_changer.ModelSlotManager import ModelSlotManager
from voice_changer.RVC.RVCModelMerger import RVCModelMerger
from voice_changer.VoiceChanger import VoiceChanger
from const import UPLOAD_DIR, ModelType
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.LoadModelParams import LoadModelParamFile, LoadModelParams
from voice_changer.utils.ModelMerger import MergeElement, ModelMergerRequest
from voice_changer.utils.VoiceChangerModel import AudioInOut
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from dataclasses import dataclass, asdict, field
@ -240,7 +242,15 @@ class VoiceChangerManager(ServerDeviceCallbacks):
return self.voiceChanger.export2onnx()
def merge_models(self, request: str):
self.voiceChanger.merge_models(request)
# self.voiceChanger.merge_models(request)
req = json.loads(request)
req = ModelMergerRequest(**req)
req.files = [MergeElement(**f) for f in req.files]
slot = len(self.modelSlotManager.getAllSlotInfo()) - 1
if req.voiceChangerType == "RVC":
merged = RVCModelMerger.merge_models(req, slot)
loadParam = LoadModelParams(voiceChangerType="RVC", slot=slot, isSampleMode=False, sampleId="", files=[LoadModelParamFile(name=os.path.basename(merged), kind="rvcModel", dir=f"{slot}")], params={})
self.loadModel(loadParam)
return self.get_info()
def setEmitTo(self, emitTo: Callable[[Any], None]):

View File

@ -0,0 +1,22 @@
from typing import Protocol
from const import VoiceChangerType
from dataclasses import dataclass
@dataclass
class MergeElement:
filename: str
strength: int
@dataclass
class ModelMergerRequest:
voiceChangerType: VoiceChangerType
command: str
files: list[MergeElement]
class ModelMerger(Protocol):
@classmethod
def merge_models(cls, request: ModelMergerRequest):
...

View File

@ -5,5 +5,5 @@ from voice_changer.utils.LoadModelParams import LoadModelParams
class ModelSlotGenerator(Protocol):
@classmethod
def loadModel(self, params: LoadModelParams):
def loadModel(cls, params: LoadModelParams):
...