mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-01-23 13:35:12 +03:00
WIP: integrate vcs to new gui 4
This commit is contained in:
parent
4b212bd442
commit
59f558ef93
2
client/demo/dist/index.js
vendored
2
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
@ -1,7 +1,7 @@
|
||||
import React, { useEffect, useMemo, useState } from "react";
|
||||
import { useGuiState } from "./001_GuiStateProvider";
|
||||
import { useAppState } from "../../001_provider/001_AppStateProvider";
|
||||
import { MergeElement, RVCModelSlot, RVCModelType } from "@dannadori/voice-changer-client-js";
|
||||
import { MergeElement, RVCModelSlot, RVCModelType, VoiceChangerType } from "@dannadori/voice-changer-client-js";
|
||||
|
||||
|
||||
export const MergeLabDialog = () => {
|
||||
@ -104,10 +104,8 @@ export const MergeLabDialog = () => {
|
||||
|
||||
const onMergeClicked = () => {
|
||||
serverSetting.mergeModel({
|
||||
voiceChangerType: VoiceChangerType.RVC,
|
||||
command: "mix",
|
||||
defaultTune: 0,
|
||||
defaultIndexRatio: 1,
|
||||
defaultProtect: 0.5,
|
||||
files: mergeElements
|
||||
})
|
||||
}
|
||||
|
@ -602,9 +602,7 @@ export type MergeElement = {
|
||||
strength: number
|
||||
}
|
||||
export type MergeModelRequest = {
|
||||
voiceChangerType: VoiceChangerType
|
||||
command: "mix",
|
||||
defaultTune: number,
|
||||
defaultIndexRatio: number,
|
||||
defaultProtect: number,
|
||||
files: MergeElement[]
|
||||
}
|
||||
|
@ -153,23 +153,7 @@ class AudioDataset(Dataset):
|
||||
start_frame = int(idx_from / frame_resolution)
|
||||
units_frame_len = int(waveform_sec / frame_resolution)
|
||||
aug_flag = random.choice([True, False]) and self.use_aug
|
||||
"""
|
||||
audio = data_buffer.get('audio')
|
||||
if audio is None:
|
||||
path_audio = os.path.join(self.path_root, 'audio', name) + '.wav'
|
||||
audio, sr = librosa.load(
|
||||
path_audio,
|
||||
sr = self.sample_rate,
|
||||
offset = start_frame * frame_resolution,
|
||||
duration = waveform_sec)
|
||||
if len(audio.shape) > 1:
|
||||
audio = librosa.to_mono(audio)
|
||||
# clip audio into N seconds
|
||||
audio = audio[ : audio.shape[-1] // self.hop_size * self.hop_size]
|
||||
audio = torch.from_numpy(audio).float()
|
||||
else:
|
||||
audio = audio[start_frame * self.hop_size : (start_frame + units_frame_len) * self.hop_size]
|
||||
"""
|
||||
|
||||
# load mel
|
||||
mel_key = "aug_mel" if aug_flag else "mel"
|
||||
mel = data_buffer.get(mel_key)
|
||||
|
@ -11,84 +11,6 @@ class NoiseScheduleVP:
|
||||
continuous_beta_1=20.0,
|
||||
dtype=torch.float32,
|
||||
):
|
||||
"""Create a wrapper class for the forward SDE (VP type).
|
||||
|
||||
***
|
||||
Update: We support discrete-time diffusion models by implementing a picewise linear interpolation for log_alpha_t.
|
||||
We recommend to use schedule='discrete' for the discrete-time diffusion models, especially for high-resolution images.
|
||||
***
|
||||
|
||||
The forward SDE ensures that the condition distribution q_{t|0}(x_t | x_0) = N ( alpha_t * x_0, sigma_t^2 * I ).
|
||||
We further define lambda_t = log(alpha_t) - log(sigma_t), which is the half-logSNR (described in the DPM-Solver paper).
|
||||
Therefore, we implement the functions for computing alpha_t, sigma_t and lambda_t. For t in [0, T], we have:
|
||||
|
||||
log_alpha_t = self.marginal_log_mean_coeff(t)
|
||||
sigma_t = self.marginal_std(t)
|
||||
lambda_t = self.marginal_lambda(t)
|
||||
|
||||
Moreover, as lambda(t) is an invertible function, we also support its inverse function:
|
||||
|
||||
t = self.inverse_lambda(lambda_t)
|
||||
|
||||
===============================================================
|
||||
|
||||
We support both discrete-time DPMs (trained on n = 0, 1, ..., N-1) and continuous-time DPMs (trained on t in [t_0, T]).
|
||||
|
||||
1. For discrete-time DPMs:
|
||||
|
||||
For discrete-time DPMs trained on n = 0, 1, ..., N-1, we convert the discrete steps to continuous time steps by:
|
||||
t_i = (i + 1) / N
|
||||
e.g. for N = 1000, we have t_0 = 1e-3 and T = t_{N-1} = 1.
|
||||
We solve the corresponding diffusion ODE from time T = 1 to time t_0 = 1e-3.
|
||||
|
||||
Args:
|
||||
betas: A `torch.Tensor`. The beta array for the discrete-time DPM. (See the original DDPM paper for details)
|
||||
alphas_cumprod: A `torch.Tensor`. The cumprod alphas for the discrete-time DPM. (See the original DDPM paper for details)
|
||||
|
||||
Note that we always have alphas_cumprod = cumprod(1 - betas). Therefore, we only need to set one of `betas` and `alphas_cumprod`.
|
||||
|
||||
**Important**: Please pay special attention for the args for `alphas_cumprod`:
|
||||
The `alphas_cumprod` is the \hat{alpha_n} arrays in the notations of DDPM. Specifically, DDPMs assume that
|
||||
q_{t_n | 0}(x_{t_n} | x_0) = N ( \sqrt{\hat{alpha_n}} * x_0, (1 - \hat{alpha_n}) * I ).
|
||||
Therefore, the notation \hat{alpha_n} is different from the notation alpha_t in DPM-Solver. In fact, we have
|
||||
alpha_{t_n} = \sqrt{\hat{alpha_n}},
|
||||
and
|
||||
log(alpha_{t_n}) = 0.5 * log(\hat{alpha_n}).
|
||||
|
||||
|
||||
2. For continuous-time DPMs:
|
||||
|
||||
We support the linear VPSDE for the continuous time setting. The hyperparameters for the noise
|
||||
schedule are the default settings in Yang Song's ScoreSDE:
|
||||
|
||||
Args:
|
||||
beta_min: A `float` number. The smallest beta for the linear schedule.
|
||||
beta_max: A `float` number. The largest beta for the linear schedule.
|
||||
T: A `float` number. The ending time of the forward process.
|
||||
|
||||
===============================================================
|
||||
|
||||
Args:
|
||||
schedule: A `str`. The noise schedule of the forward SDE. 'discrete' for discrete-time DPMs,
|
||||
'linear' for continuous-time DPMs.
|
||||
Returns:
|
||||
A wrapper object of the forward SDE (VP type).
|
||||
|
||||
===============================================================
|
||||
|
||||
Example:
|
||||
|
||||
# For discrete-time DPMs, given betas (the beta array for n = 0, 1, ..., N - 1):
|
||||
>>> ns = NoiseScheduleVP('discrete', betas=betas)
|
||||
|
||||
# For discrete-time DPMs, given alphas_cumprod (the \hat{alpha_n} array for n = 0, 1, ..., N - 1):
|
||||
>>> ns = NoiseScheduleVP('discrete', alphas_cumprod=alphas_cumprod)
|
||||
|
||||
# For continuous-time DPMs (VPSDE), linear schedule:
|
||||
>>> ns = NoiseScheduleVP('linear', continuous_beta_0=0.1, continuous_beta_1=20.)
|
||||
|
||||
"""
|
||||
|
||||
if schedule not in ["discrete", "linear"]:
|
||||
raise ValueError("Unsupported noise schedule {}. The schedule needs to be 'discrete' or 'linear'".format(schedule))
|
||||
|
||||
|
@ -197,8 +197,7 @@ class RVC(VoiceChangerModel):
|
||||
pass
|
||||
|
||||
def export2onnx(self):
|
||||
allModelSlots = self.modelSlotManager.getAllSlotInfo()
|
||||
modelSlot = allModelSlots[self.settings.modelSlotIndex]
|
||||
modelSlot = self.slotInfo
|
||||
|
||||
if modelSlot.isONNX:
|
||||
print("[Voice Changer] export2onnx, No pyTorch filepath.")
|
||||
|
36
server/voice_changer/RVC/RVCModelMerger.py
Normal file
36
server/voice_changer/RVC/RVCModelMerger.py
Normal file
@ -0,0 +1,36 @@
|
||||
import os
|
||||
|
||||
import torch
|
||||
from const import UPLOAD_DIR
|
||||
from voice_changer.RVC.modelMerger.MergeModel import merge_model
|
||||
from voice_changer.utils.ModelMerger import ModelMerger, ModelMergerRequest
|
||||
|
||||
|
||||
class RVCModelMerger(ModelMerger):
|
||||
@classmethod
|
||||
def merge_models(cls, request: ModelMergerRequest, storeSlot: int):
|
||||
print("[Voice Changer] MergeRequest:", request)
|
||||
merged = merge_model(request)
|
||||
|
||||
# いったんは、アップロードフォルダに格納する。(歴史的経緯)
|
||||
# 後続のloadmodelを呼び出すことで永続化モデルフォルダに移動させられる。
|
||||
storeDir = os.path.join(UPLOAD_DIR, f"{storeSlot}")
|
||||
print("[Voice Changer] store merged model to:", storeDir)
|
||||
os.makedirs(storeDir, exist_ok=True)
|
||||
storeFile = os.path.join(storeDir, "merged.pth")
|
||||
torch.save(merged, storeFile)
|
||||
return storeFile
|
||||
|
||||
# # loadmodelを呼び出して永続化モデルフォルダに移動させる。
|
||||
# params = {
|
||||
# "defaultTune": req.defaultTune,
|
||||
# "defaultIndexRatio": req.defaultIndexRatio,
|
||||
# "defaultProtect": req.defaultProtect,
|
||||
# "sampleId": "",
|
||||
# "files": {"rvcModel": storeFile},
|
||||
# }
|
||||
# props: LoadModelParams = LoadModelParams(slot=targetSlot, isHalf=True, params=params)
|
||||
# self.loadModel(props)
|
||||
# self.prepareModel(targetSlot)
|
||||
# self.settings.modelSlotIndex = targetSlot
|
||||
# self.currentSlot = self.settings.modelSlotIndex
|
@ -1,10 +1,12 @@
|
||||
from typing import Dict, Any
|
||||
from voice_changer.RVC.modelMerger.MergeModelRequest import MergeModelRequest
|
||||
|
||||
from collections import OrderedDict
|
||||
import torch
|
||||
|
||||
from voice_changer.utils.ModelMerger import ModelMergerRequest
|
||||
|
||||
def merge_model(request: MergeModelRequest):
|
||||
|
||||
def merge_model(request: ModelMergerRequest):
|
||||
def extract(ckpt: Dict[str, Any]):
|
||||
a = ckpt["model"]
|
||||
opt: Dict[str, Any] = OrderedDict()
|
||||
@ -61,13 +63,7 @@ def merge_model(request: MergeModelRequest):
|
||||
merged["sr"] = state_dict["sr"]
|
||||
merged["f0"] = state_dict["f0"]
|
||||
merged["info"] = state_dict["info"]
|
||||
merged["embedder_name"] = (
|
||||
state_dict["embedder_name"] if "embedder_name" in state_dict else None
|
||||
)
|
||||
merged["embedder_output_layer"] = (
|
||||
state_dict["embedder_output_layer"]
|
||||
if "embedder_output_layer" in state_dict
|
||||
else None
|
||||
)
|
||||
merged["embedder_name"] = state_dict["embedder_name"] if "embedder_name" in state_dict else None
|
||||
merged["embedder_output_layer"] = state_dict["embedder_output_layer"] if "embedder_output_layer" in state_dict else None
|
||||
print("write metadata done.")
|
||||
return merged
|
||||
|
@ -1,21 +0,0 @@
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List
|
||||
from dataclasses_json import dataclass_json
|
||||
|
||||
|
||||
@dataclass_json
|
||||
@dataclass
|
||||
class MergeFile:
|
||||
filename: str
|
||||
strength: int
|
||||
|
||||
|
||||
@dataclass_json
|
||||
@dataclass
|
||||
class MergeModelRequest:
|
||||
command: str = ""
|
||||
slot: int = -1
|
||||
defaultTune: int = 0
|
||||
defaultIndexRatio: int = 1
|
||||
defaultProtect: float = 0.5
|
||||
files: List[MergeFile] = field(default_factory=lambda: [])
|
21
server/voice_changer/RVC/modelMerger/MergeModelRequest_.py
Normal file
21
server/voice_changer/RVC/modelMerger/MergeModelRequest_.py
Normal file
@ -0,0 +1,21 @@
|
||||
# from dataclasses import dataclass, field
|
||||
# from typing import List
|
||||
# from dataclasses_json import dataclass_json
|
||||
|
||||
|
||||
# @dataclass_json
|
||||
# @dataclass
|
||||
# class MergeFile:
|
||||
# filename: str
|
||||
# strength: int
|
||||
|
||||
|
||||
# @dataclass_json
|
||||
# @dataclass
|
||||
# class MergeModelRequest:
|
||||
# command: str = ""
|
||||
# slot: int = -1
|
||||
# defaultTune: int = 0
|
||||
# defaultIndexRatio: int = 1
|
||||
# defaultProtect: float = 0.5
|
||||
# files: List[MergeFile] = field(default_factory=lambda: [])
|
@ -83,49 +83,6 @@ class VoiceChanger:
|
||||
def setModel(self, model: Any):
|
||||
self.voiceChanger = model
|
||||
|
||||
# def switchModelType(self, modelType: ModelType):
|
||||
# print("Switch Model Type:", modelType)
|
||||
# try:
|
||||
# if self.voiceChanger is not None:
|
||||
# # return {"status": "ERROR", "msg": "vc is already selected. currently re-select is not implemented"}
|
||||
# del self.voiceChanger
|
||||
# self.voiceChanger = None
|
||||
|
||||
# self.modelType = modelType
|
||||
# if self.modelType == "MMVCv15":
|
||||
# from voice_changer.MMVCv15.MMVCv15 import MMVCv15
|
||||
|
||||
# self.voiceChanger = MMVCv15() # type: ignore
|
||||
# elif self.modelType == "MMVCv13":
|
||||
# from voice_changer.MMVCv13.MMVCv13 import MMVCv13
|
||||
|
||||
# self.voiceChanger = MMVCv13()
|
||||
# elif self.modelType == "so-vits-svc-40v2":
|
||||
# from voice_changer.SoVitsSvc40v2.SoVitsSvc40v2 import SoVitsSvc40v2
|
||||
|
||||
# self.voiceChanger = SoVitsSvc40v2(self.params)
|
||||
# elif self.modelType == "so-vits-svc-40" or self.modelType == "so-vits-svc-40_c":
|
||||
# from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
|
||||
|
||||
# self.voiceChanger = SoVitsSvc40(self.params)
|
||||
# elif self.modelType == "DDSP-SVC":
|
||||
# from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
|
||||
|
||||
# self.voiceChanger = DDSP_SVC(self.params)
|
||||
# elif self.modelType == "RVC":
|
||||
# from voice_changer.RVC.RVC import RVC
|
||||
|
||||
# self.voiceChanger = RVC(self.params)
|
||||
# else:
|
||||
# from voice_changer.MMVCv13.MMVCv13 import MMVCv13
|
||||
|
||||
# self.voiceChanger = MMVCv13()
|
||||
# except Exception as e:
|
||||
# print(e)
|
||||
# print(traceback.format_exc())
|
||||
# print("Switch Model Type:", self.voiceChanger)
|
||||
# return {"status": "OK", "msg": "vc is switched."}
|
||||
|
||||
def getModelType(self):
|
||||
if self.modelType is not None:
|
||||
return {"status": "OK", "vc": self.modelType}
|
||||
@ -384,27 +341,6 @@ class VoiceChanger:
|
||||
self.voiceChanger.merge_models(request)
|
||||
return self.get_info()
|
||||
|
||||
def update_model_default(self):
|
||||
if self.voiceChanger is None:
|
||||
print("[Voice Changer] Voice Changer is not selected.")
|
||||
return
|
||||
self.voiceChanger.update_model_default()
|
||||
return self.get_info()
|
||||
|
||||
def update_model_info(self, newData: str):
|
||||
if self.voiceChanger is None:
|
||||
print("[Voice Changer] Voice Changer is not selected.")
|
||||
return
|
||||
self.voiceChanger.update_model_info(newData)
|
||||
return self.get_info()
|
||||
|
||||
def upload_model_assets(self, params: str):
|
||||
if self.voiceChanger is None:
|
||||
print("[Voice Changer] Voice Changer is not selected.")
|
||||
return
|
||||
self.voiceChanger.upload_model_assets(params)
|
||||
return self.get_info()
|
||||
|
||||
|
||||
PRINT_CONVERT_PROCESSING: bool = False
|
||||
# PRINT_CONVERT_PROCESSING = True
|
||||
|
@ -5,9 +5,11 @@ import numpy as np
|
||||
from downloader.SampleDownloader import downloadSample, getSampleInfos
|
||||
from voice_changer.Local.ServerDevice import ServerDevice, ServerDeviceCallbacks
|
||||
from voice_changer.ModelSlotManager import ModelSlotManager
|
||||
from voice_changer.RVC.RVCModelMerger import RVCModelMerger
|
||||
from voice_changer.VoiceChanger import VoiceChanger
|
||||
from const import UPLOAD_DIR, ModelType
|
||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||
from voice_changer.utils.LoadModelParams import LoadModelParamFile, LoadModelParams
|
||||
from voice_changer.utils.ModelMerger import MergeElement, ModelMergerRequest
|
||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||
from dataclasses import dataclass, asdict, field
|
||||
@ -240,7 +242,15 @@ class VoiceChangerManager(ServerDeviceCallbacks):
|
||||
return self.voiceChanger.export2onnx()
|
||||
|
||||
def merge_models(self, request: str):
|
||||
self.voiceChanger.merge_models(request)
|
||||
# self.voiceChanger.merge_models(request)
|
||||
req = json.loads(request)
|
||||
req = ModelMergerRequest(**req)
|
||||
req.files = [MergeElement(**f) for f in req.files]
|
||||
slot = len(self.modelSlotManager.getAllSlotInfo()) - 1
|
||||
if req.voiceChangerType == "RVC":
|
||||
merged = RVCModelMerger.merge_models(req, slot)
|
||||
loadParam = LoadModelParams(voiceChangerType="RVC", slot=slot, isSampleMode=False, sampleId="", files=[LoadModelParamFile(name=os.path.basename(merged), kind="rvcModel", dir=f"{slot}")], params={})
|
||||
self.loadModel(loadParam)
|
||||
return self.get_info()
|
||||
|
||||
def setEmitTo(self, emitTo: Callable[[Any], None]):
|
||||
|
22
server/voice_changer/utils/ModelMerger.py
Normal file
22
server/voice_changer/utils/ModelMerger.py
Normal file
@ -0,0 +1,22 @@
|
||||
from typing import Protocol
|
||||
from const import VoiceChangerType
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class MergeElement:
|
||||
filename: str
|
||||
strength: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelMergerRequest:
|
||||
voiceChangerType: VoiceChangerType
|
||||
command: str
|
||||
files: list[MergeElement]
|
||||
|
||||
|
||||
class ModelMerger(Protocol):
|
||||
@classmethod
|
||||
def merge_models(cls, request: ModelMergerRequest):
|
||||
...
|
@ -5,5 +5,5 @@ from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||
|
||||
class ModelSlotGenerator(Protocol):
|
||||
@classmethod
|
||||
def loadModel(self, params: LoadModelParams):
|
||||
def loadModel(cls, params: LoadModelParams):
|
||||
...
|
||||
|
Loading…
Reference in New Issue
Block a user