mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-02-02 16:23:58 +03:00
init
This commit is contained in:
parent
2d3a59e453
commit
e6e33a8d02
1
.gitignore
vendored
1
.gitignore
vendored
@ -0,0 +1 @@
|
|||||||
|
dummy
|
86
README.md
Normal file
86
README.md
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
|
||||||
|
# 使用方法
|
||||||
|
## 前提
|
||||||
|
## Docker
|
||||||
|
Dockerを使えるようにしておいてください。
|
||||||
|
WindowsはWSL2上で使えるようにしておいてください。
|
||||||
|
|
||||||
|
トレーニング時にはGPUを見えるようにしておいてください。
|
||||||
|
```
|
||||||
|
$ docker run --gpus all --rm nvidia/cuda nvidia-smi
|
||||||
|
```
|
||||||
|
を実行して
|
||||||
|
```
|
||||||
|
Sun Sep 15 22:40:52 2019
|
||||||
|
+-----------------------------------------------------------------------------+
|
||||||
|
| NVIDIA-SMI 430.26 Driver Version: 430.26 CUDA Version: 10.2 |
|
||||||
|
|-------------------------------+----------------------+----------------------+
|
||||||
|
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
|
||||||
|
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|
||||||
|
|===============================+======================+======================|
|
||||||
|
| 0 GeForce GTX 106... Off | 00000000:01:00.0 On | N/A |
|
||||||
|
| 38% 32C P8 6W / 120W | 2MiB / 3016MiB | 0% Default |
|
||||||
|
+-------------------------------+----------------------+----------------------+
|
||||||
|
|
||||||
|
+-----------------------------------------------------------------------------+
|
||||||
|
| Processes: GPU Memory |
|
||||||
|
| GPU PID Type Process name Usage |
|
||||||
|
|=============================================================================|
|
||||||
|
| No running processes found |
|
||||||
|
+-----------------------------------------------------------------------------+
|
||||||
|
```
|
||||||
|
こんな感じの出力が出ればOKです。
|
||||||
|
|
||||||
|
ボイスチェンジ時にはGPUは必須ではありません。あればより高速にぼいちぇんできるかもしれません。
|
||||||
|
|
||||||
|
## トレーニングデータの準備
|
||||||
|
|
||||||
|
### Datasetの中身
|
||||||
|
```
|
||||||
|
$ ls dataset -l
|
||||||
|
合計 1656692
|
||||||
|
drwxr-xr-x 4 wataru wataru 4096 8月 22 14:31 00_myvoice
|
||||||
|
-rwx------ 1 wataru wataru 57620200 8月 22 14:18 1225_zundamon.zip
|
||||||
|
-rwx------ 1 wataru wataru 72992810 8月 22 14:18 344_tsumugi.zip
|
||||||
|
-rwx------ 1 wataru wataru 55275760 8月 22 14:18 459_methane.zip
|
||||||
|
-rwx------ 1 wataru wataru 72295236 8月 22 14:18 912_sora.zip
|
||||||
|
|
||||||
|
$ ls dataset/00_myvoice/ -l
|
||||||
|
合計 40
|
||||||
|
drwxr-xr-x 2 wataru wataru 20480 8月 22 14:32 text
|
||||||
|
drwxr-xr-x 2 wataru wataru 20480 8月 22 14:31 wav
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## 起動と実行
|
||||||
|
|
||||||
|
```
|
||||||
|
# 変数設定
|
||||||
|
$ EXP_NAME=001_exp
|
||||||
|
|
||||||
|
# テスト用フォルダ作成
|
||||||
|
$ sh template.sh $EXP_NAME
|
||||||
|
|
||||||
|
$ docker run -it --gpus all --shm-size=2g \
|
||||||
|
-v `pwd`/exp/${EXP_NAME}/dataset:/MMVC_Trainer/dataset \
|
||||||
|
-v `pwd`/exp/${EXP_NAME}/logs:/MMVC_Trainer/logs \
|
||||||
|
-v `pwd`/exp/${EXP_NAME}/filelists:/MMVC_Trainer/filelists \
|
||||||
|
-p 6008:6006 mmvc_trainer_docker
|
||||||
|
```
|
||||||
|
|
||||||
|
# ビルド
|
||||||
|
## 前提
|
||||||
|
このリポジトリではnodeを使っていませんが、ビルドスクリプト呼び出しにnpmを使用しています。
|
||||||
|
npmをインストールしておいてください。
|
||||||
|
|
||||||
|
https://nodejs.org/ja/download/
|
||||||
|
|
||||||
|
## Docker
|
||||||
|
Dockerを使えるようにしておいてください。
|
||||||
|
WindowsはWSL2上で使えるようにしておいてください。
|
||||||
|
|
||||||
|
## ビルド実行
|
||||||
|
```
|
||||||
|
$ npm run build:docker
|
||||||
|
```
|
||||||
|
|
2
dataset/.gitignore
vendored
Normal file
2
dataset/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
*
|
||||||
|
!.gitignore
|
2
exp/.gitignore
vendored
Normal file
2
exp/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
*
|
||||||
|
!.gitignore
|
21
package.json
Normal file
21
package.json
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
{
|
||||||
|
"name": "voice-changer",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "",
|
||||||
|
"main": "index.js",
|
||||||
|
"scripts": {
|
||||||
|
"build:docker": "date +%Y%m%d%H%M%S > trainer/dummy && DOCKER_BUILDKIT=1 docker build --ssh default -f trainer/Dockerfile trainer/ -t mmvc_trainer_docker",
|
||||||
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
|
},
|
||||||
|
"repository": {
|
||||||
|
"type": "git",
|
||||||
|
"url": "git+https://github.com/w-okada/voice-changer.git"
|
||||||
|
},
|
||||||
|
"keywords": [],
|
||||||
|
"author": "",
|
||||||
|
"license": "ISC",
|
||||||
|
"bugs": {
|
||||||
|
"url": "https://github.com/w-okada/voice-changer/issues"
|
||||||
|
},
|
||||||
|
"homepage": "https://github.com/w-okada/voice-changer#readme"
|
||||||
|
}
|
54
template.sh
Normal file
54
template.sh
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
## 005_expより複数話者対応。myvoiceのフォルダ階層を一つ深くする。
|
||||||
|
EXP_NAME=$1
|
||||||
|
|
||||||
|
echo $EXP_NAME
|
||||||
|
|
||||||
|
# (A)
|
||||||
|
mkdir -p exp/${EXP_NAME}/logs
|
||||||
|
mkdir -p exp/${EXP_NAME}/filelists
|
||||||
|
|
||||||
|
mkdir -p exp/${EXP_NAME}/dataset
|
||||||
|
echo "00_myvoice|107" > exp/${EXP_NAME}/dataset/multi_speaker_correspondence.txt
|
||||||
|
echo "01_target_zundamon|100" >> exp/${EXP_NAME}/dataset/multi_speaker_correspondence.txt
|
||||||
|
echo "02_target_tsumugi|103" >> exp/${EXP_NAME}/dataset/multi_speaker_correspondence.txt
|
||||||
|
echo "03_target_metan|102" >> exp/${EXP_NAME}/dataset/multi_speaker_correspondence.txt
|
||||||
|
echo "04_target_ksora|101" >> exp/${EXP_NAME}/dataset/multi_speaker_correspondence.txt
|
||||||
|
|
||||||
|
# (B) トレーニングデータ作成
|
||||||
|
# (B-0) my voice
|
||||||
|
mkdir -p exp/${EXP_NAME}/dataset/textful/00_myvoice/text
|
||||||
|
mkdir -p exp/${EXP_NAME}/dataset/textful/00_myvoice/wav
|
||||||
|
cp dataset/00_myvoice/wav/* exp/${EXP_NAME}/dataset/textful/00_myvoice/wav/
|
||||||
|
cp dataset/00_myvoice/text/* exp/${EXP_NAME}/dataset/textful/00_myvoice/text/
|
||||||
|
|
||||||
|
|
||||||
|
# (B-1) ずんだもん
|
||||||
|
mkdir -p exp/${EXP_NAME}/dataset/textful/01_target_zundamon/
|
||||||
|
unzip -j dataset/1225_zundamon.zip 1225_zundamon/wav/* -d exp/${EXP_NAME}/dataset/textful/01_target_zundamon/wav/
|
||||||
|
unzip -j dataset/1225_zundamon.zip 1225_zundamon/text/* -d exp/${EXP_NAME}/dataset/textful/01_target_zundamon/text/
|
||||||
|
|
||||||
|
# (B-2) 春日部つむぎ
|
||||||
|
mkdir -p exp/${EXP_NAME}/dataset/textful/02_target_tsumugi/
|
||||||
|
unzip -j dataset/344_tsumugi.zip 344_tsumugi/wav/* -d exp/${EXP_NAME}/dataset/textful/02_target_tsumugi/wav/
|
||||||
|
unzip -j dataset/344_tsumugi.zip 344_tsumugi/text/* -d exp/${EXP_NAME}/dataset/textful/02_target_tsumugi/text/
|
||||||
|
|
||||||
|
# (B-3) 四国めたん
|
||||||
|
mkdir -p exp/${EXP_NAME}/dataset/textful/03_target_metan/
|
||||||
|
unzip -j dataset/459_methane.zip 459_methane/wav/* -d exp/${EXP_NAME}/dataset/textful/03_target_metan/wav/
|
||||||
|
unzip -j dataset/459_methane.zip 459_methane/text/* -d exp/${EXP_NAME}/dataset/textful/03_target_metan/text/
|
||||||
|
|
||||||
|
# (B-4) 九州そら
|
||||||
|
mkdir -p exp/${EXP_NAME}/dataset/textful/04_target_ksora/
|
||||||
|
unzip -j dataset/912_sora.zip 912_sora/wav/* -d exp/${EXP_NAME}/dataset/textful/04_target_ksora/wav/
|
||||||
|
unzip -j dataset/912_sora.zip 912_sora/text/* -d exp/${EXP_NAME}/dataset/textful/04_target_ksora/text/
|
||||||
|
|
||||||
|
## 004_expまで。
|
||||||
|
# echo $1
|
||||||
|
# mkdir -p ${EXP_NAME}/00_myvoice/text
|
||||||
|
# mkdir -p ${EXP_NAME}/00_myvoice/wav
|
||||||
|
# mkdir -p ${EXP_NAME}/logs
|
||||||
|
# mkdir -p ${EXP_NAME}/filelists
|
||||||
|
|
||||||
|
|
90
trainer/Dockerfile
Normal file
90
trainer/Dockerfile
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
FROM debian:bullseye-slim as base
|
||||||
|
|
||||||
|
ARG DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
RUN apt-get update
|
||||||
|
RUN apt-get install -y python3-pip git
|
||||||
|
RUN apt-get install -y espeak
|
||||||
|
RUN apt-get install -y cmake
|
||||||
|
|
||||||
|
RUN git clone --depth 1 https://github.com/isletennos/MMVC_Trainer.git -b v1.3.1.0
|
||||||
|
|
||||||
|
RUN pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
|
||||||
|
|
||||||
|
RUN pip install Cython==0.29.32
|
||||||
|
RUN pip install numpy==1.22.4
|
||||||
|
RUN pip install scipy==1.9.0
|
||||||
|
RUN pip install librosa==0.9.2
|
||||||
|
RUN pip install phonemizer==3.2.1
|
||||||
|
RUN pip install Unidecode==1.3.4
|
||||||
|
RUN pip install resampy==0.4.0
|
||||||
|
|
||||||
|
RUN pip install tqdm==4.64.0
|
||||||
|
RUN pip install retry==0.9.2
|
||||||
|
RUN pip install psutil==5.9.1
|
||||||
|
RUN pip install python-socketio==5.7.1
|
||||||
|
RUN pip install eventlet==0.33.1
|
||||||
|
|
||||||
|
RUN pip install pyopenjtalk==0.2.0
|
||||||
|
RUN pip install tensorboard==2.10.0
|
||||||
|
RUN pip install matplotlib==3.5.3
|
||||||
|
|
||||||
|
WORKDIR /MMVC_Trainer/monotonic_align
|
||||||
|
RUN cythonize -3 -i core.pyx \
|
||||||
|
&& mv core.cpython-39-x86_64-linux-gnu.so monotonic_align/
|
||||||
|
|
||||||
|
|
||||||
|
FROM debian:bullseye-slim
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y python3-pip espeak\
|
||||||
|
&& apt-get clean \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY --from=base /usr/local/lib/python3.9/dist-packages /usr/local/lib/python3.9/dist-packages
|
||||||
|
COPY --from=base /MMVC_Trainer /MMVC_Trainer
|
||||||
|
|
||||||
|
COPY fine_model/G_180000.pth /MMVC_Trainer/fine_model/G_180000.pth
|
||||||
|
COPY fine_model/D_180000.pth /MMVC_Trainer/fine_model/D_180000.pth
|
||||||
|
|
||||||
|
WORKDIR /MMVC_Trainer
|
||||||
|
ADD /setup.sh /MMVC_Trainer/
|
||||||
|
|
||||||
|
# ENTRYPOINT ["/bin/bash", "setup.sh"]
|
||||||
|
# CMD [ "8", "false"]
|
||||||
|
|
||||||
|
# FROM ubuntu:focal-20220531 as base
|
||||||
|
|
||||||
|
# ARG DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
# RUN apt-get update && apt-get install -y libfreetype6-dev g++ cmake sox libsndfile1-dev ffmpeg flac python3-pip
|
||||||
|
# RUN apt-get install -y git emacs mlocate curl wget unzip
|
||||||
|
|
||||||
|
# RUN git clone https://github.com/isletennos/MMVC_Trainer.git
|
||||||
|
|
||||||
|
# RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
|
||||||
|
# sh Miniconda3-latest-Linux-x86_64.sh -b -p /opt/miniconda3 && \
|
||||||
|
# rm -r Miniconda3-latest-Linux-x86_64.sh
|
||||||
|
|
||||||
|
# ENV PATH /opt/miniconda3/bin:$PATH
|
||||||
|
# RUN pip install --upgrade pip && \
|
||||||
|
# conda update conda && \
|
||||||
|
# conda create -n mmvc -c defaults python=3.7 && \
|
||||||
|
# conda init && \
|
||||||
|
# echo "conda activate mmvc" >> ~/.bashrc
|
||||||
|
|
||||||
|
# SHELL ["conda", "run", "-n", "mmvc", "/bin/bash", "-c"]
|
||||||
|
|
||||||
|
# ENV CONDA_DEFAULT_ENV mmvc && \
|
||||||
|
# PATH /opt/conda/envs/mmvc/bin:$PATH
|
||||||
|
|
||||||
|
# RUN pip install pyopenjtalk
|
||||||
|
# RUN pip install Cython==0.29.21 librosa==0.8.0 matplotlib==3.3.1 numpy phonemizer==2.2.1 scipy==1.5.2 tensorboard Unidecode==1.1.1 retry tqdm resampy==0.2.2
|
||||||
|
# RUN pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
|
||||||
|
|
||||||
|
# RUN pip install psutil
|
||||||
|
|
||||||
|
# COPY fine_model/G_180000.pth /MMVC_Trainer/fine_model/G_180000.pth
|
||||||
|
# COPY fine_model/D_180000.pth /MMVC_Trainer/fine_model/D_180000.pth
|
||||||
|
|
||||||
|
# WORKDIR /MMVC_Trainer
|
||||||
|
|
2
trainer/fine_model/.gitignore
vendored
Normal file
2
trainer/fine_model/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
*
|
||||||
|
!.gitignore
|
20
trainer/setup.sh
Normal file
20
trainer/setup.sh
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
BATCH_SIZE=$1
|
||||||
|
RESUME=$2
|
||||||
|
echo batch:${BATCH_SIZE}
|
||||||
|
echo resume:${RESUME}
|
||||||
|
|
||||||
|
python3 create_dataset_jtalk.py -f train_config -s 24000 -m dataset/multi_speaker_correspondence.txt
|
||||||
|
|
||||||
|
sed -ie 's/80000/8000/' train_ms.py
|
||||||
|
sed -ie "s/\"batch_size\": 10/\"batch_size\": $BATCH_SIZE/" configs/train_config.json
|
||||||
|
|
||||||
|
|
||||||
|
# cd monotonic_align/ \
|
||||||
|
# && cythonize -3 -i core.pyx \
|
||||||
|
# && mv core.cpython-39-x86_64-linux-gnu.so monotonic_align/ \
|
||||||
|
# && cd -
|
||||||
|
|
||||||
|
python3 -m tensorboard.main --logdir logs --port 6006 --host 0.0.0.0 &
|
||||||
|
python3 train_ms.py -c configs/train_config.json -m 20220306_24000 -fg fine_model/G_180000.pth -fd fine_model/D_180000.pth
|
Loading…
Reference in New Issue
Block a user