This commit is contained in:
wataru 2022-10-30 00:58:21 +09:00
parent 9d5c714526
commit c01b7331b5
9 changed files with 760 additions and 39 deletions

280
demo/MMVCServerSIO.py Executable file
View File

@ -0,0 +1,280 @@
import sys, os, struct, argparse, logging, shutil, base64, traceback
sys.path.append("/MMVC_Trainer")
sys.path.append("/MMVC_Trainer/text")
import uvicorn
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from pydantic import BaseModel
from scipy.io.wavfile import write, read
import socketio
from distutils.util import strtobool
from datetime import datetime
import torch
import numpy as np
from mods.ssl import create_self_signed_cert
from mods.VoiceChanger import VoiceChanger
class UvicornSuppressFilter(logging.Filter):
def filter(self, record):
return False
logger = logging.getLogger("uvicorn.error")
logger.addFilter(UvicornSuppressFilter())
# logger.propagate = False
logger = logging.getLogger("multipart.multipart")
logger.propagate = False
class VoiceModel(BaseModel):
gpu: int
srcId: int
dstId: int
timestamp: int
buffer: str
class MyCustomNamespace(socketio.AsyncNamespace):
def __init__(self, namespace):
super().__init__(namespace)
def loadModel(self, config, model):
if hasattr(self, 'voiceChanger') == True:
self.voiceChanger.destroy()
self.voiceChanger = VoiceChanger(config, model)
def changeVoice(self, gpu, srcId, dstId, timestamp, unpackedData):
return self.voiceChanger.on_request(gpu, srcId, dstId, timestamp, unpackedData)
def on_connect(self, sid, environ):
# print('[{}] connet sid : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S') , sid))
pass
async def on_request_message(self, sid, msg):
# print("on_request_message", torch.cuda.memory_allocated())
gpu = int(msg[0])
srcId = int(msg[1])
dstId = int(msg[2])
timestamp = int(msg[3])
data = msg[4]
# print(srcId, dstId, timestamp)
unpackedData = np.array(struct.unpack('<%sh'%(len(data) // struct.calcsize('<h') ), data))
audio1 = self.changeVoice(gpu, srcId, dstId, timestamp, unpackedData)
bin = struct.pack('<%sh'%len(audio1), *audio1)
await self.emit('response',[timestamp, bin])
def on_disconnect(self, sid):
# print('[{}] disconnect'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
pass;
def setupArgParser():
parser = argparse.ArgumentParser()
parser.add_argument("-p", type=int, default=8080, help="port")
parser.add_argument("-c", type=str, help="path for the config.json")
parser.add_argument("-m", type=str, help="path for the model file")
parser.add_argument("--https", type=strtobool, default=False, help="use https")
parser.add_argument("--httpsKey", type=str, default="ssl.key", help="path for the key of https")
parser.add_argument("--httpsCert", type=str, default="ssl.cert", help="path for the cert of https")
parser.add_argument("--httpsSelfSigned", type=strtobool, default=True, help="generate self-signed certificate")
return parser
def printMessage(message, level=0):
if level == 0:
print(f"\033[17m{message}\033[0m")
elif level == 1:
print(f"\033[34m {message}\033[0m")
elif level == 2:
print(f"\033[32m {message}\033[0m")
else:
print(f"\033[47m {message}\033[0m")
global app_socketio
printMessage(f"Phase name:{__name__}", level=2)
thisFilename = os.path.basename(__file__)[:-3]
if __name__ == thisFilename:
printMessage(f"PHASE3:{__name__}", level=2)
parser = setupArgParser()
args = parser.parse_args()
PORT = args.p
CONFIG = args.c
MODEL = args.m
app_fastapi = FastAPI()
sio = socketio.AsyncServer(
async_mode='asgi',
cors_allowed_origins='*'
)
namespace = MyCustomNamespace('/test')
sio.register_namespace(namespace)
if CONFIG and MODEL:
namespace.loadModel(CONFIG, MODEL)
app_socketio = socketio.ASGIApp(
sio,
other_asgi_app=app_fastapi,
static_files={
'': '../frontend/dist',
'/': '../frontend/dist/index.html',
}
)
@app_fastapi.get("/api/hello")
async def index():
return {"result": "Index"}
@app_fastapi.post("/api/uploadfile/model")
async def upload_file(configFile:UploadFile = File(...), modelFile: UploadFile = File(...)):
if configFile and modelFile:
for file in [modelFile, configFile]:
filename = file.filename
fileobj = file.file
upload_dir = open(os.path.join(".", filename),'wb+')
shutil.copyfileobj(fileobj, upload_dir)
upload_dir.close()
namespace.loadModel(configFile.filename, modelFile.filename)
return {"uploaded files": f"{configFile.filename}, {modelFile.filename} "}
return {"Error": "uploaded file is not found."}
@app_fastapi.post("/test")
async def post_test(voice:VoiceModel):
try:
# print("POST REQUEST PROCESSING....")
gpu = voice.gpu
srcId = voice.srcId
dstId = voice.dstId
timestamp = voice.timestamp
buffer = voice.buffer
wav = base64.b64decode(buffer)
if wav==0:
samplerate, data=read("dummy.wav")
unpackedData = data
else:
unpackedData = np.array(struct.unpack('<%sh'%(len(wav) // struct.calcsize('<h') ), wav))
write("logs/received_data.wav", 24000, unpackedData.astype(np.int16))
changedVoice = namespace.changeVoice(gpu, srcId, dstId, timestamp, unpackedData)
changedVoiceBase64 = base64.b64encode(changedVoice).decode('utf-8')
data = {
"gpu":gpu,
"srcId":srcId,
"dstId":dstId,
"timestamp":timestamp,
"changedVoiceBase64":changedVoiceBase64
}
json_compatible_item_data = jsonable_encoder(data)
return JSONResponse(content=json_compatible_item_data)
except Exception as e:
print("REQUEST PROCESSING!!!! EXCEPTION!!!", e)
print(traceback.format_exc())
return str(e)
if __name__ == '__mp_main__':
printMessage(f"PHASE2:{__name__}", level=2)
if __name__ == '__main__':
printMessage(f"PHASE1:{__name__}", level=2)
parser = setupArgParser()
args = parser.parse_args()
PORT = args.p
CONFIG = args.c
MODEL = args.m
printMessage(f"Start MMVC SocketIO Server", level=0)
printMessage(f"CONFIG:{CONFIG}, MODEL:{MODEL}", level=1)
if os.environ["EX_PORT"]:
EX_PORT = os.environ["EX_PORT"]
printMessage(f"External_Port:{EX_PORT} Internal_Port:{PORT}", level=1)
else:
printMessage(f"Internal_Port:{PORT}", level=1)
if os.environ["EX_IP"]:
EX_IP = os.environ["EX_IP"]
printMessage(f"External_IP:{EX_IP}", level=1)
# HTTPS key/cert作成
if args.https and args.httpsSelfSigned == 1:
# HTTPS(おれおれ証明書生成)
os.makedirs("./key", exist_ok=True)
key_base_name = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}"
keyname = f"{key_base_name}.key"
certname = f"{key_base_name}.cert"
create_self_signed_cert(certname, keyname, certargs=
{"Country": "JP",
"State": "Tokyo",
"City": "Chuo-ku",
"Organization": "F",
"Org. Unit": "F"}, cert_dir="./key")
key_path = os.path.join("./key", keyname)
cert_path = os.path.join("./key", certname)
printMessage(f"protocol: HTTPS(self-signed), key:{key_path}, cert:{cert_path}", level=1)
elif args.https and args.httpsSelfSigned == 0:
# HTTPS
key_path = args.httpsKey
cert_path = args.httpsCert
printMessage(f"protocol: HTTPS, key:{key_path}, cert:{cert_path}", level=1)
else:
# HTTP
printMessage(f"protocol: HTTP", level=1)
# アドレス表示
if args.https == 1:
printMessage(f"open https://<IP>:<PORT>/ with your browser.", level=0)
else:
printMessage(f"open http://<IP>:<PORT>/ with your browser.", level=0)
if EX_PORT and EX_IP and args.https == 1:
printMessage(f"In many cases it is one of the following", level=1)
printMessage(f"https://localhost:{EX_PORT}/", level=1)
for ip in EX_IP.strip().split(" "):
printMessage(f"https://{ip}:{EX_PORT}/", level=1)
elif EX_PORT and EX_IP and args.https == 0:
printMessage(f"In many cases it is one of the following", level=1)
printMessage(f"http://localhost:{EX_PORT}/", level=1)
# サーバ起動
if args.https:
# HTTPS サーバ起動
uvicorn.run(
f"{os.path.basename(__file__)[:-3]}:app_socketio",
host="0.0.0.0",
port=int(PORT),
reload=True,
ssl_keyfile = key_path,
ssl_certfile = cert_path,
log_level="critical"
)
else:
# HTTP サーバ起動
uvicorn.run(
f"{os.path.basename(__file__)[:-3]}:app_socketio",
host="0.0.0.0",
port=int(PORT),
reload=True,
log_level="critical"
)

76
demo/mods/VoiceChanger.py Executable file
View File

@ -0,0 +1,76 @@
import torch
from scipy.io.wavfile import write, read
import numpy as np
import struct, traceback
import utils
import commons
from models import SynthesizerTrn
from text.symbols import symbols
from data_utils import TextAudioSpeakerLoader, TextAudioSpeakerCollate
from mel_processing import spectrogram_torch
from text import text_to_sequence, cleaned_text_to_sequence
class VoiceChanger():
def __init__(self, config, model):
self.hps = utils.get_hparams_from_file(config)
self.net_g = SynthesizerTrn(
len(symbols),
self.hps.data.filter_length // 2 + 1,
self.hps.train.segment_size // self.hps.data.hop_length,
n_speakers=self.hps.data.n_speakers,
**self.hps.model)
self.net_g.eval()
self.gpu_num = torch.cuda.device_count()
utils.load_checkpoint( model, self.net_g, None)
print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num})")
def destroy(self):
del self.net_g
def on_request(self, gpu, srcId, dstId, timestamp, wav):
# if wav==0:
# samplerate, data=read("dummy.wav")
# unpackedData = data
# else:
# unpackedData = np.array(struct.unpack('<%sh'%(len(wav) // struct.calcsize('<h') ), wav))
# write("logs/received_data.wav", 24000, unpackedData.astype(np.int16))
unpackedData = wav
try:
text_norm = text_to_sequence("a", self.hps.data.text_cleaners)
text_norm = commons.intersperse(text_norm, 0)
text_norm = torch.LongTensor(text_norm)
audio = torch.FloatTensor(unpackedData.astype(np.float32))
audio_norm = audio /self.hps.data.max_wav_value
audio_norm = audio_norm.unsqueeze(0)
spec = spectrogram_torch(audio_norm, self.hps.data.filter_length,
self.hps.data.sampling_rate, self.hps.data.hop_length, self.hps.data.win_length,
center=False)
spec = torch.squeeze(spec, 0)
sid = torch.LongTensor([int(srcId)])
data = (text_norm, spec, audio_norm, sid)
data = TextAudioSpeakerCollate()([data])
if gpu<0 or self.gpu_num==0 :
with torch.no_grad():
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.cpu() for x in data]
sid_tgt1 = torch.LongTensor([dstId]).cpu()
audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0,0].data * self.hps.data.max_wav_value).cpu().float().numpy()
else:
with torch.no_grad():
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.cuda(gpu) for x in data]
sid_tgt1 = torch.LongTensor([dstId]).cuda(gpu)
audio1 = (self.net_g.cuda(gpu).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0,0].data * self.hps.data.max_wav_value).cpu().float().numpy()
except Exception as e:
print("VC PROCESSING!!!! EXCEPTION!!!", e)
print(traceback.format_exc())
audio1 = audio1.astype(np.int16)
return audio1

24
demo/mods/ssl.py Executable file
View File

@ -0,0 +1,24 @@
import os
from OpenSSL import crypto
def create_self_signed_cert(certfile, keyfile, certargs, cert_dir="."):
C_F = os.path.join(cert_dir, certfile)
K_F = os.path.join(cert_dir, keyfile)
if not os.path.exists(C_F) or not os.path.exists(K_F):
k = crypto.PKey()
k.generate_key(crypto.TYPE_RSA, 2048)
cert = crypto.X509()
cert.get_subject().C = certargs["Country"]
cert.get_subject().ST = certargs["State"]
cert.get_subject().L = certargs["City"]
cert.get_subject().O = certargs["Organization"]
cert.get_subject().OU = certargs["Org. Unit"]
cert.get_subject().CN = 'Example'
cert.set_serial_number(1000)
cert.gmtime_adj_notBefore(0)
cert.gmtime_adj_notAfter(315360000)
cert.set_issuer(cert.get_subject())
cert.set_pubkey(k)
cert.sign(k, 'sha1')
open(C_F, "wb").write(crypto.dump_certificate(crypto.FILETYPE_PEM, cert))
open(K_F, "wb").write(crypto.dump_privatekey(crypto.FILETYPE_PEM, k))

View File

@ -22,7 +22,12 @@ from mel_processing import spectrogram_torch
from text import text_to_sequence, cleaned_text_to_sequence from text import text_to_sequence, cleaned_text_to_sequence
class MyCustomNamespace(socketio.Namespace): class MyCustomNamespace(socketio.Namespace):
def __init__(self, namespace, config, model): def __init__(self, namespace):
super().__init__(namespace)
self.gpu_num = torch.cuda.device_count()
print("GPU_NUM:",self.gpu_num)
def __init__old(self, namespace, config, model):
super().__init__(namespace) super().__init__(namespace)
self.hps =utils.get_hparams_from_file(config) self.hps =utils.get_hparams_from_file(config)
self.net_g = SynthesizerTrn( self.net_g = SynthesizerTrn(
@ -36,12 +41,37 @@ class MyCustomNamespace(socketio.Namespace):
print("GPU_NUM:",self.gpu_num) print("GPU_NUM:",self.gpu_num)
utils.load_checkpoint( model, self.net_g, None) utils.load_checkpoint( model, self.net_g, None)
def loadModel(self, config, model):
self.hps =utils.get_hparams_from_file(config)
print("before DELETE:", torch.cuda.memory_allocated())
if hasattr(self, 'net_g') == True:
print("DELETE MODEL:", torch.cuda.memory_allocated())
del self.net_g
print("before load", torch.cuda.memory_allocated())
self.net_g = SynthesizerTrn(
len(symbols),
self.hps.data.filter_length // 2 + 1,
self.hps.train.segment_size // self.hps.data.hop_length,
n_speakers=self.hps.data.n_speakers,
**self.hps.model)
self.net_g.eval()
utils.load_checkpoint( model, self.net_g, None)
print(torch.cuda.memory_allocated())
print("after load", torch.cuda.memory_allocated())
def on_connect(self, sid, environ): def on_connect(self, sid, environ):
print('[{}] connet sid : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S') , sid)) print('[{}] connet sid : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S') , sid))
# print('[{}] connet env : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S') , environ)) # print('[{}] connet env : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S') , environ))
def on_load_model(self, sid, msg):
print("on_load_model")
print(msg)
pass
def on_request_message(self, sid, msg): def on_request_message(self, sid, msg):
# print("MESSGaa", msg) print("on_request_message", torch.cuda.memory_allocated())
gpu = int(msg[0]) gpu = int(msg[0])
srcId = int(msg[1]) srcId = int(msg[1])
dstId = int(msg[2]) dstId = int(msg[2])
@ -223,7 +253,17 @@ if __name__ == '__main__':
# SocketIOセットアップ # SocketIOセットアップ
sio = socketio.Server(cors_allowed_origins='*') sio = socketio.Server(cors_allowed_origins='*')
sio.register_namespace(MyCustomNamespace('/test', CONFIG, MODEL)) namespace = MyCustomNamespace('/test')
sio.register_namespace(namespace)
print("loadmodel1:")
namespace.loadModel(CONFIG, MODEL)
print("loadmodel2:")
namespace.loadModel(CONFIG, MODEL)
print("loadmodel3:")
namespace.loadModel(CONFIG, MODEL)
print("loadmodel4:")
namespace.loadModel(CONFIG, MODEL)
print("loadmodel5:")
app = socketio.WSGIApp(sio,static_files={ app = socketio.WSGIApp(sio,static_files={
'': '../frontend/dist', '': '../frontend/dist',
'/': '../frontend/dist/index.html', '/': '../frontend/dist/index.html',

View File

@ -12,32 +12,17 @@ if [[ -e ./setting.json ]]; then
echo "カスタムセッティングを使用" echo "カスタムセッティングを使用"
cp ./setting.json ../frontend/dist/assets/setting.json cp ./setting.json ../frontend/dist/assets/setting.json
else else
if [ "${TYPE}" = "SOFT_VC" ] ; then cp ../frontend/dist/assets/setting_mmvc.json ../frontend/dist/assets/setting.json
cp ../frontend/dist/assets/setting_softvc.json ../frontend/dist/assets/setting.json
elif [ "${TYPE}" = "SOFT_VC_FAST_API" ] ; then
cp ../frontend/dist/assets/setting_softvc_colab.json ../frontend/dist/assets/setting.json
else
cp ../frontend/dist/assets/setting_mmvc.json ../frontend/dist/assets/setting.json
fi
fi fi
# 起動 # 起動
if [ "${TYPE}" = "SOFT_VC" ] ; then if [ "${TYPE}" = "MMVC" ] ; then
echo "SOFT_VCを起動します"
python3 SoftVcServerSIO.py $PARAMS 2>stderr.txt
elif [ "${TYPE}" = "SOFT_VC_VERBOSE" ] ; then
echo "SOFT_VCを起動します(verbose)"
python3 SoftVcServerSIO.py $PARAMS
elif [ "${TYPE}" = "SOFT_VC_FAST_API" ] ; then
echo "SOFT_VC_FAST_APIを起動します"
python3 SoftVcServerFastAPI.py 8080 docker
elif [ "${TYPE}" = "MMVC" ] ; then
echo "MMVCを起動します" echo "MMVCを起動します"
python3 serverSIO.py $PARAMS 2>stderr.txt python3 MMVCServerSIO.py $PARAMS 2>stderr.txt
elif [ "${TYPE}" = "MMVC_VERBOSE" ] ; then elif [ "${TYPE}" = "MMVC_VERBOSE" ] ; then
echo "MMVCを起動します(verbose)" echo "MMVCを起動します(verbose)"
python3 serverSIO.py $PARAMS python3 MMVCServerSIO.py $PARAMS
fi fi

File diff suppressed because one or more lines are too long

View File

@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
set -eu set -eu
DOCKER_IMAGE=dannadori/voice-changer:20221028_220714 DOCKER_IMAGE=dannadori/voice-changer:20221029_233016
#DOCKER_IMAGE=voice-changer #DOCKER_IMAGE=voice-changer

314
start_v0.1.sh Normal file
View File

@ -0,0 +1,314 @@
#!/bin/bash
set -eu
DOCKER_IMAGE=dannadori/voice-changer:20221028_220714
#DOCKER_IMAGE=voice-changer
MODE=$1
PARAMS=${@:2:($#-1)}
### DEFAULT VAR ###
DEFAULT_EX_PORT=18888
DEFAULT_USE_GPU=on # on|off
DEFAULT_VERBOSE=off # on|off
### ENV VAR ###
EX_PORT=${EX_PORT:-${DEFAULT_EX_PORT}}
USE_GPU=${USE_GPU:-${DEFAULT_USE_GPU}}
VERBOSE=${VERBOSE:-${DEFAULT_VERBOSE}}
#echo $EX_PORT $USE_GPU $VERBOSE
### INTERNAL SETTING ###
TENSORBOARD_PORT=6006
SIO_PORT=8080
###
if [ "${MODE}" = "MMVC_TRAIN" ]; then
echo "トレーニングを開始します"
docker run -it --gpus all --shm-size=128M \
-v `pwd`/exp/${name}/dataset:/MMVC_Trainer/dataset \
-v `pwd`/exp/${name}/logs:/MMVC_Trainer/logs \
-v `pwd`/exp/${name}/filelists:/MMVC_Trainer/filelists \
-v `pwd`/vc_resources:/resources \
-e LOCAL_UID=$(id -u $USER) \
-e LOCAL_GID=$(id -g $USER) \
-e EX_IP="`hostname -I`" \
-e EX_PORT=${EX_PORT} \
-e VERBOSE=${VERBOSE} \
-p ${EX_PORT}:6006 $DOCKER_IMAGE "$@"
elif [ "${MODE}" = "MMVC" ]; then
if [ "${USE_GPU}" = "on" ]; then
echo "MMVCを起動します(with gpu)"
docker run -it --gpus all --shm-size=128M \
-v `pwd`/vc_resources:/resources \
-e LOCAL_UID=$(id -u $USER) \
-e LOCAL_GID=$(id -g $USER) \
-e EX_IP="`hostname -I`" \
-e EX_PORT=${EX_PORT} \
-e VERBOSE=${VERBOSE} \
-p ${EX_PORT}:8080 $DOCKER_IMAGE "$@"
else
echo "MMVCを起動します(only cpu)"
docker run -it --shm-size=128M \
-v `pwd`/vc_resources:/resources \
-e LOCAL_UID=$(id -u $USER) \
-e LOCAL_GID=$(id -g $USER) \
-e EX_IP="`hostname -I`" \
-e EX_PORT=${EX_PORT} \
-e VERBOSE=${VERBOSE} \
-p ${EX_PORT}:8080 $DOCKER_IMAGE "$@"
# docker run -it --shm-size=128M \
# -v `pwd`/vc_resources:/resources \
# -e LOCAL_UID=$(id -u $USER) \
# -e LOCAL_GID=$(id -g $USER) \
# -e EX_IP="`hostname -I`" \
# -e EX_PORT=${EX_PORT} \
# -e VERBOSE=${VERBOSE} \
# --entrypoint="" \
# -p ${EX_PORT}:8080 $DOCKER_IMAGE /bin/bash
fi
elif [ "${MODE}" = "SOFT_VC" ]; then
if [ "${USE_GPU}" = "on" ]; then
echo "Start Soft-vc"
docker run -it --gpus all --shm-size=128M \
-v `pwd`/vc_resources:/resources \
-e LOCAL_UID=$(id -u $USER) \
-e LOCAL_GID=$(id -g $USER) \
-e EX_IP="`hostname -I`" \
-e EX_PORT=${EX_PORT} \
-e VERBOSE=${VERBOSE} \
-p ${EX_PORT}:8080 $DOCKER_IMAGE "$@"
else
echo "Start Soft-vc withou GPU is not supported"
fi
else
echo "
usage:
$0 <MODE> <params...>
MODE: select one of ['MMVC_TRAIN', 'MMVC', 'SOFT_VC']
" >&2
fi
# echo $EX_PORT
# echo "------"
# echo "$@"
# echo "------"
# # usage() {
# # echo "
# # usage:
# # For training
# # $0 [-t] -n <exp_name> [-b batch_size] [-r]
# # -t: トレーニングモードで実行する場合に指定してください。(train)
# # -n: トレーニングの名前です。(name)
# # -b: バッチサイズです。(batchsize)
# # -r: トレーニング再開の場合に指定してください。(resume)
# # For changing voice
# # $0 [-v] [-c config] [-m model] [-g on/off]
# # -v: ボイスチェンジャーモードで実行する場合に指定してください。(voice changer)
# # -c: トレーニングで使用したConfigのファイル名です。(config)
# # -m: トレーニング済みのモデルのファイル名です。(model)
# # -g: GPU使用/不使用。デフォルトはonなのでGPUを使う場合は指定不要。(gpu)
# # -p: port番号
# # For help
# # $0 [-h]
# # -h: show this help
# # " >&2
# # }
# # warn () {
# # echo "! ! ! $1 ! ! !"
# # exit 1
# # }
# # training_flag=false
# # name=999_exp
# # batch_size=10
# # resume_flag=false
# # voice_change_flag=false
# # config=
# # model=
# # gpu=on
# # port=8080
# # escape_flag=false
# # # オプション解析
# # while getopts tn:b:rvc:m:g:p:hx OPT; do
# # case $OPT in
# # t)
# # training_flag=true
# # ;;
# # n)
# # name="$OPTARG"
# # ;;
# # b)
# # batch_size="$OPTARG"
# # ;;
# # r)
# # resume_flag=true
# # ;;
# # v)
# # voice_change_flag=true
# # ;;
# # c)
# # config="$OPTARG"
# # ;;
# # m)
# # model="$OPTARG"
# # ;;
# # g)
# # gpu="$OPTARG"
# # ;;
# # p)
# # port="$OPTARG"
# # ;;
# # h | \?)
# # usage && exit 1
# # ;;
# # x)
# # escape_flag=true
# # esac
# # done
# # # モード解析
# # if $training_flag && $voice_change_flag; then
# # warn "-tトレーニングモード と -vボイチェンモードは同時に指定できません。"
# # elif $training_flag; then
# # echo "■■■ ト レ ー ニ ン グ モ ー ド ■■■"
# # elif $voice_change_flag; then
# # echo "■■■ ボ イ チ ェ ン モ ー ド ■■■"
# # elif $escape_flag; then
# # /bin/bash
# # else
# # warn "-tトレーニングモード と -vボイチェンモードのいずれかを指定してください。"
# # fi
# if [ "${MODE}" = "MMVC_TRAIN_INITIAL" ]; then
# echo "トレーニングを開始します"
# elif [ "${MODE}" = "MMVC" ]; then
# echo "MMVCを起動します"
# docker run -it --gpus all --shm-size=128M \
# -v `pwd`/vc_resources:/resources \
# -e LOCAL_UID=$(id -u $USER) \
# -e LOCAL_GID=$(id -g $USER) \
# -e EX_IP="`hostname -I`" \
# -e EX_PORT=${port} \
# -p ${port}:8080 $DOCKER_IMAGE -v -c ${config} -m ${model}
# elif [ "${MODE}" = "MMVC_VERBOSE" ]; then
# echo "MMVCを起動します(verbose)"
# elif [ "${MODE}" = "MMVC_CPU" ]; then
# echo "MMVCを起動します(CPU)"
# elif [ "${MODE}" = "MMVC_CPU_VERBOSE" ]; then
# echo "MMVCを起動します(CPU)(verbose)"
# elif [ "${MODE}" = "SOFT_VC" ]; then
# echo "Start Soft-vc"
# elif [ "${MODE}" = "SOFT_VC_VERBOSE" ]; then
# echo "Start Soft-vc(verbose)"
# else
# echo "
# usage:
# $0 <MODE> <params...>
# EX_PORT:
# MODE: one of ['MMVC_TRAIN', 'MMVC', 'SOFT_VC']
# For 'MMVC_TRAIN':
# $0 MMVC_TRAIN_INITIAL -n <exp_name> [-b batch_size] [-r]
# -n: トレーニングの名前です。(name)
# -b: バッチサイズです。(batchsize)
# -r: トレーニング再開の場合に指定してください。(resume)
# For 'MMVC'
# $0 MMVC [-c config] [-m model] [-g on/off] [-p port] [-v]
# -c: トレーニングで使用したConfigのファイル名です。(config)
# -m: トレーニング済みのモデルのファイル名です。(model)
# -g: GPU使用/不使用。デフォルトはonなのでGPUを使う場合は指定不要。(gpu)
# -p: Docker からExposeするport番号
# -v: verbose
# For 'SOFT_VC'
# $0 SOFT_VC [-c config] [-m model] [-g on/off]
# -p: port exposed from docker container.
# -v: verbose
# " >&2
# fi
# # if $training_flag; then
# # if $resume_flag; then
# # echo "トレーニングを再開します"
# # docker run -it --gpus all --shm-size=128M \
# # -v `pwd`/exp/${name}/dataset:/MMVC_Trainer/dataset \
# # -v `pwd`/exp/${name}/logs:/MMVC_Trainer/logs \
# # -v `pwd`/exp/${name}/filelists:/MMVC_Trainer/filelists \
# # -v `pwd`/vc_resources:/resources \
# # -e LOCAL_UID=$(id -u $USER) \
# # -e LOCAL_GID=$(id -g $USER) \
# # -p ${TENSORBOARD_PORT}:6006 $DOCKER_IMAGE -t -b ${batch_size} -r
# # else
# # echo "トレーニングを開始します"
# # docker run -it --gpus all --shm-size=128M \
# # -v `pwd`/exp/${name}/dataset:/MMVC_Trainer/dataset \
# # -v `pwd`/exp/${name}/logs:/MMVC_Trainer/logs \
# # -v `pwd`/exp/${name}/filelists:/MMVC_Trainer/filelists \
# # -v `pwd`/vc_resources:/resources \
# # -e LOCAL_UID=$(id -u $USER) \
# # -e LOCAL_GID=$(id -g $USER) \
# # -p ${TENSORBOARD_PORT}:6006 $DOCKER_IMAGE -t -b ${batch_size}
# # fi
# # fi
# # if $voice_change_flag; then
# # if [[ -z "$config" ]]; then
# # warn "コンフィグファイル(-c)を指定してください"
# # fi
# # if [[ -z "$model" ]]; then
# # warn "モデルファイル(-m)を指定してください"
# # fi
# # if [ "${gpu}" = "on" ]; then
# # echo "GPUをマウントして起動します。"
# # docker run -it --gpus all --shm-size=128M \
# # -v `pwd`/vc_resources:/resources \
# # -e LOCAL_UID=$(id -u $USER) \
# # -e LOCAL_GID=$(id -g $USER) \
# # -e EX_IP="`hostname -I`" \
# # -e EX_PORT=${port} \
# # -p ${port}:8080 $DOCKER_IMAGE -v -c ${config} -m ${model}
# # elif [ "${gpu}" = "off" ]; then
# # echo "CPUのみで稼働します。GPUは使用できません。"
# # docker run -it --shm-size=128M \
# # -v `pwd`/vc_resources:/resources \
# # -e LOCAL_UID=$(id -u $USER) \
# # -e LOCAL_GID=$(id -g $USER) \
# # -e EX_IP="`hostname -I`" \
# # -e EX_PORT=${port} \
# # -p ${port}:8080 $DOCKER_IMAGE -v -c ${config} -m ${model}
# # else
# # echo ${gpu}
# # warn "-g は onかoffで指定して下さい。"
# # fi
# # fi

View File

@ -1,4 +1,4 @@
FROM dannadori/voice-changer-internal:20221028_220538 as front FROM dannadori/voice-changer-internal:20221029_231527 as front
FROM debian:bullseye-slim as base FROM debian:bullseye-slim as base
ARG DEBIAN_FRONTEND=noninteractive ARG DEBIAN_FRONTEND=noninteractive
@ -8,7 +8,7 @@ RUN apt-get install -y python3-pip git
RUN apt-get install -y espeak RUN apt-get install -y espeak
RUN apt-get install -y cmake RUN apt-get install -y cmake
RUN git clone --depth 1 https://github.com/isletennos/MMVC_Trainer.git -b v1.3.1.3 #RUN git clone --depth 1 https://github.com/isletennos/MMVC_Trainer.git -b v1.3.1.3
RUN pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113 RUN pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
@ -24,17 +24,20 @@ RUN pip install tqdm==4.64.0
RUN pip install retry==0.9.2 RUN pip install retry==0.9.2
RUN pip install psutil==5.9.1 RUN pip install psutil==5.9.1
RUN pip install python-socketio==5.7.1 RUN pip install python-socketio==5.7.1
RUN pip install eventlet==0.33.1 RUN pip install matplotlib==3.5.3
RUN pip install fastapi==0.85.0
RUN pip install python-multipart==0.0.5
RUN pip install uvicorn==0.18.3
RUN pip install websockets==10.4
RUN pip install pyOpenSSL==22.0.0
RUN pip install pyopenjtalk==0.2.0 RUN pip install pyopenjtalk==0.2.0
RUN pip install tensorboard==2.10.0 RUN pip install tensorboard==2.10.0
RUN pip install matplotlib==3.5.3
RUN pip install pyOpenSSL==22.0.0 # WORKDIR /MMVC_Trainer/monotonic_align
# RUN cythonize -3 -i core.pyx \
WORKDIR /MMVC_Trainer/monotonic_align # && mv core.cpython-39-x86_64-linux-gnu.so monotonic_align/
RUN cythonize -3 -i core.pyx \
&& mv core.cpython-39-x86_64-linux-gnu.so monotonic_align/
FROM debian:bullseye-slim FROM debian:bullseye-slim
@ -64,12 +67,11 @@ COPY --from=front --chmod=777 /voice-changer-internal/frontend/dist /voice-chang
COPY --from=front --chmod=777 /voice-changer-internal/voice-change-service /voice-changer-internal/voice-change-service COPY --from=front --chmod=777 /voice-changer-internal/voice-change-service /voice-changer-internal/voice-change-service
RUN chmod 0777 /voice-changer-internal/voice-change-service RUN chmod 0777 /voice-changer-internal/voice-change-service
##### Soft VC # ##### Soft VC
COPY --from=front /hubert /hubert # COPY --from=front /hubert /hubert
COPY --from=front /acoustic-model /acoustic-model # COPY --from=front /acoustic-model /acoustic-model
COPY --from=front /hifigan /hifigan # COPY --from=front /hifigan /hifigan
# COPY --from=front /models /models
COPY --from=front /models /models
ENTRYPOINT ["/bin/bash", "setup.sh"] ENTRYPOINT ["/bin/bash", "setup.sh"]