This commit is contained in:
wataru 2022-08-22 16:08:49 +09:00
parent 2d3a59e453
commit e6e33a8d02
9 changed files with 278 additions and 0 deletions

1
.gitignore vendored
View File

@ -0,0 +1 @@
dummy

86
README.md Normal file
View File

@ -0,0 +1,86 @@
# 使用方法
## 前提
## Docker
Dockerを使えるようにしておいてください。
WindowsはWSL2上で使えるようにしておいてください。
トレーニング時にはGPUを見えるようにしておいてください。
```
$ docker run --gpus all --rm nvidia/cuda nvidia-smi
```
を実行して
```
Sun Sep 15 22:40:52 2019
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 430.26 Driver Version: 430.26 CUDA Version: 10.2 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 GeForce GTX 106... Off | 00000000:01:00.0 On | N/A |
| 38% 32C P8 6W / 120W | 2MiB / 3016MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
```
こんな感じの出力が出ればOKです。
ボイスチェンジ時にはGPUは必須ではありません。あればより高速にぼいちぇんできるかもしれません。
## トレーニングデータの準備
### Datasetの中身
```
$ ls dataset -l
合計 1656692
drwxr-xr-x 4 wataru wataru 4096 8月 22 14:31 00_myvoice
-rwx------ 1 wataru wataru 57620200 8月 22 14:18 1225_zundamon.zip
-rwx------ 1 wataru wataru 72992810 8月 22 14:18 344_tsumugi.zip
-rwx------ 1 wataru wataru 55275760 8月 22 14:18 459_methane.zip
-rwx------ 1 wataru wataru 72295236 8月 22 14:18 912_sora.zip
$ ls dataset/00_myvoice/ -l
合計 40
drwxr-xr-x 2 wataru wataru 20480 8月 22 14:32 text
drwxr-xr-x 2 wataru wataru 20480 8月 22 14:31 wav
```
## 起動と実行
```
# 変数設定
$ EXP_NAME=001_exp
# テスト用フォルダ作成
$ sh template.sh $EXP_NAME
$ docker run -it --gpus all --shm-size=2g \
-v `pwd`/exp/${EXP_NAME}/dataset:/MMVC_Trainer/dataset \
-v `pwd`/exp/${EXP_NAME}/logs:/MMVC_Trainer/logs \
-v `pwd`/exp/${EXP_NAME}/filelists:/MMVC_Trainer/filelists \
-p 6008:6006 mmvc_trainer_docker
```
# ビルド
## 前提
このリポジトリではnodeを使っていませんが、ビルドスクリプト呼び出しにnpmを使用しています。
npmをインストールしておいてください。
https://nodejs.org/ja/download/
## Docker
Dockerを使えるようにしておいてください。
WindowsはWSL2上で使えるようにしておいてください。
## ビルド実行
```
$ npm run build:docker
```

2
dataset/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

2
exp/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

21
package.json Normal file
View File

@ -0,0 +1,21 @@
{
"name": "voice-changer",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"build:docker": "date +%Y%m%d%H%M%S > trainer/dummy && DOCKER_BUILDKIT=1 docker build --ssh default -f trainer/Dockerfile trainer/ -t mmvc_trainer_docker",
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {
"type": "git",
"url": "git+https://github.com/w-okada/voice-changer.git"
},
"keywords": [],
"author": "",
"license": "ISC",
"bugs": {
"url": "https://github.com/w-okada/voice-changer/issues"
},
"homepage": "https://github.com/w-okada/voice-changer#readme"
}

54
template.sh Normal file
View File

@ -0,0 +1,54 @@
#!/bin/bash
## 005_expより複数話者対応。myvoiceのフォルダ階層を一つ深くする。
EXP_NAME=$1
echo $EXP_NAME
# (A)
mkdir -p exp/${EXP_NAME}/logs
mkdir -p exp/${EXP_NAME}/filelists
mkdir -p exp/${EXP_NAME}/dataset
echo "00_myvoice|107" > exp/${EXP_NAME}/dataset/multi_speaker_correspondence.txt
echo "01_target_zundamon|100" >> exp/${EXP_NAME}/dataset/multi_speaker_correspondence.txt
echo "02_target_tsumugi|103" >> exp/${EXP_NAME}/dataset/multi_speaker_correspondence.txt
echo "03_target_metan|102" >> exp/${EXP_NAME}/dataset/multi_speaker_correspondence.txt
echo "04_target_ksora|101" >> exp/${EXP_NAME}/dataset/multi_speaker_correspondence.txt
# (B) トレーニングデータ作成
# (B-0) my voice
mkdir -p exp/${EXP_NAME}/dataset/textful/00_myvoice/text
mkdir -p exp/${EXP_NAME}/dataset/textful/00_myvoice/wav
cp dataset/00_myvoice/wav/* exp/${EXP_NAME}/dataset/textful/00_myvoice/wav/
cp dataset/00_myvoice/text/* exp/${EXP_NAME}/dataset/textful/00_myvoice/text/
# (B-1) ずんだもん
mkdir -p exp/${EXP_NAME}/dataset/textful/01_target_zundamon/
unzip -j dataset/1225_zundamon.zip 1225_zundamon/wav/* -d exp/${EXP_NAME}/dataset/textful/01_target_zundamon/wav/
unzip -j dataset/1225_zundamon.zip 1225_zundamon/text/* -d exp/${EXP_NAME}/dataset/textful/01_target_zundamon/text/
# (B-2) 春日部つむぎ
mkdir -p exp/${EXP_NAME}/dataset/textful/02_target_tsumugi/
unzip -j dataset/344_tsumugi.zip 344_tsumugi/wav/* -d exp/${EXP_NAME}/dataset/textful/02_target_tsumugi/wav/
unzip -j dataset/344_tsumugi.zip 344_tsumugi/text/* -d exp/${EXP_NAME}/dataset/textful/02_target_tsumugi/text/
# (B-3) 四国めたん
mkdir -p exp/${EXP_NAME}/dataset/textful/03_target_metan/
unzip -j dataset/459_methane.zip 459_methane/wav/* -d exp/${EXP_NAME}/dataset/textful/03_target_metan/wav/
unzip -j dataset/459_methane.zip 459_methane/text/* -d exp/${EXP_NAME}/dataset/textful/03_target_metan/text/
# (B-4) 九州そら
mkdir -p exp/${EXP_NAME}/dataset/textful/04_target_ksora/
unzip -j dataset/912_sora.zip 912_sora/wav/* -d exp/${EXP_NAME}/dataset/textful/04_target_ksora/wav/
unzip -j dataset/912_sora.zip 912_sora/text/* -d exp/${EXP_NAME}/dataset/textful/04_target_ksora/text/
## 004_expまで。
# echo $1
# mkdir -p ${EXP_NAME}/00_myvoice/text
# mkdir -p ${EXP_NAME}/00_myvoice/wav
# mkdir -p ${EXP_NAME}/logs
# mkdir -p ${EXP_NAME}/filelists

90
trainer/Dockerfile Normal file
View File

@ -0,0 +1,90 @@
FROM debian:bullseye-slim as base
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update
RUN apt-get install -y python3-pip git
RUN apt-get install -y espeak
RUN apt-get install -y cmake
RUN git clone --depth 1 https://github.com/isletennos/MMVC_Trainer.git -b v1.3.1.0
RUN pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
RUN pip install Cython==0.29.32
RUN pip install numpy==1.22.4
RUN pip install scipy==1.9.0
RUN pip install librosa==0.9.2
RUN pip install phonemizer==3.2.1
RUN pip install Unidecode==1.3.4
RUN pip install resampy==0.4.0
RUN pip install tqdm==4.64.0
RUN pip install retry==0.9.2
RUN pip install psutil==5.9.1
RUN pip install python-socketio==5.7.1
RUN pip install eventlet==0.33.1
RUN pip install pyopenjtalk==0.2.0
RUN pip install tensorboard==2.10.0
RUN pip install matplotlib==3.5.3
WORKDIR /MMVC_Trainer/monotonic_align
RUN cythonize -3 -i core.pyx \
&& mv core.cpython-39-x86_64-linux-gnu.so monotonic_align/
FROM debian:bullseye-slim
RUN apt-get update \
&& apt-get install -y python3-pip espeak\
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
COPY --from=base /usr/local/lib/python3.9/dist-packages /usr/local/lib/python3.9/dist-packages
COPY --from=base /MMVC_Trainer /MMVC_Trainer
COPY fine_model/G_180000.pth /MMVC_Trainer/fine_model/G_180000.pth
COPY fine_model/D_180000.pth /MMVC_Trainer/fine_model/D_180000.pth
WORKDIR /MMVC_Trainer
ADD /setup.sh /MMVC_Trainer/
# ENTRYPOINT ["/bin/bash", "setup.sh"]
# CMD [ "8", "false"]
# FROM ubuntu:focal-20220531 as base
# ARG DEBIAN_FRONTEND=noninteractive
# RUN apt-get update && apt-get install -y libfreetype6-dev g++ cmake sox libsndfile1-dev ffmpeg flac python3-pip
# RUN apt-get install -y git emacs mlocate curl wget unzip
# RUN git clone https://github.com/isletennos/MMVC_Trainer.git
# RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
# sh Miniconda3-latest-Linux-x86_64.sh -b -p /opt/miniconda3 && \
# rm -r Miniconda3-latest-Linux-x86_64.sh
# ENV PATH /opt/miniconda3/bin:$PATH
# RUN pip install --upgrade pip && \
# conda update conda && \
# conda create -n mmvc -c defaults python=3.7 && \
# conda init && \
# echo "conda activate mmvc" >> ~/.bashrc
# SHELL ["conda", "run", "-n", "mmvc", "/bin/bash", "-c"]
# ENV CONDA_DEFAULT_ENV mmvc && \
# PATH /opt/conda/envs/mmvc/bin:$PATH
# RUN pip install pyopenjtalk
# RUN pip install Cython==0.29.21 librosa==0.8.0 matplotlib==3.3.1 numpy phonemizer==2.2.1 scipy==1.5.2 tensorboard Unidecode==1.1.1 retry tqdm resampy==0.2.2
# RUN pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
# RUN pip install psutil
# COPY fine_model/G_180000.pth /MMVC_Trainer/fine_model/G_180000.pth
# COPY fine_model/D_180000.pth /MMVC_Trainer/fine_model/D_180000.pth
# WORKDIR /MMVC_Trainer

2
trainer/fine_model/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

20
trainer/setup.sh Normal file
View File

@ -0,0 +1,20 @@
#!/bin/bash
BATCH_SIZE=$1
RESUME=$2
echo batch:${BATCH_SIZE}
echo resume:${RESUME}
python3 create_dataset_jtalk.py -f train_config -s 24000 -m dataset/multi_speaker_correspondence.txt
sed -ie 's/80000/8000/' train_ms.py
sed -ie "s/\"batch_size\": 10/\"batch_size\": $BATCH_SIZE/" configs/train_config.json
# cd monotonic_align/ \
# && cythonize -3 -i core.pyx \
# && mv core.cpython-39-x86_64-linux-gnu.so monotonic_align/ \
# && cd -
python3 -m tensorboard.main --logdir logs --port 6006 --host 0.0.0.0 &
python3 train_ms.py -c configs/train_config.json -m 20220306_24000 -fg fine_model/G_180000.pth -fd fine_model/D_180000.pth