From c2efe5cba1a70faeefafe0557053217f811a5ecc Mon Sep 17 00:00:00 2001 From: wataru Date: Wed, 31 May 2023 02:26:16 +0900 Subject: [PATCH] test model --- .gitignore | 1 + server/const.py | 8 +- server/initialize.sh | 2 +- server/samples_0001.json | 139 ------------------ server/samples_0002.json | 79 ---------- server/samples_0003_o.json | 74 ---------- server/samples_0003_t.json | 74 ---------- server/voice_changer/DDSP_SVC/DDSP_SVC.py | 2 +- server/voice_changer/MMVCv13/MMVCv13.py | 2 +- server/voice_changer/MMVCv15/MMVCv15.py | 2 +- server/voice_changer/RVC/ModelSlot.py | 8 +- .../voice_changer/RVC/ModelSlotGenerator.py | 119 +++++---------- server/voice_changer/RVC/RVC.py | 97 +++++++----- server/voice_changer/RVC/RVCSettings.py | 11 +- server/voice_changer/RVC/SampleDownloader.py | 89 ++++++++--- .../RVC/onnxExporter/export2onnx.py | 6 +- server/voice_changer/RVC/pipeline/Pipeline.py | 24 +-- .../voice_changer/SoVitsSvc40/SoVitsSvc40.py | 2 +- .../SoVitsSvc40v2/SoVitsSvc40v2.py | 2 +- server/voice_changer/VoiceChanger.py | 10 +- 20 files changed, 209 insertions(+), 542 deletions(-) delete mode 100644 server/samples_0001.json delete mode 100644 server/samples_0002.json delete mode 100644 server/samples_0003_o.json delete mode 100644 server/samples_0003_t.json diff --git a/.gitignore b/.gitignore index f1ae6e37..01b486a6 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,7 @@ docker/cudnn/ server/pretrain/ server/weights/ +server/model_dir/ server/weights_/ server/weights__/ server/models/ diff --git a/server/const.py b/server/const.py index bc1e97f9..694cc160 100644 --- a/server/const.py +++ b/server/const.py @@ -103,8 +103,10 @@ class ServerAudioDeviceTypes(Enum): SAMPLES_JSONS = [ # "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0001.json", # "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0002.json", - "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_t.json", - "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_o.json", + # "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_t.json", + # "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_o.json", + "https://huggingface.co/wok000/vcclient_model/raw/main/test/test_official_v1_v2.jsona", + "https://huggingface.co/wok000/vcclient_model/raw/main/test/test_ddpn_v1_v2.jsona", ] RVC_MODEL_DIRNAME = "rvc" -RVC_MAX_SLOT_NUM = 5 +RVC_MAX_SLOT_NUM = 10 diff --git a/server/initialize.sh b/server/initialize.sh index a2fdd5a9..5235dfc3 100644 --- a/server/initialize.sh +++ b/server/initialize.sh @@ -1,2 +1,2 @@ -rm -r models +rm -r model_dir rm -r pretrain \ No newline at end of file diff --git a/server/samples_0001.json b/server/samples_0001.json deleted file mode 100644 index 78b0b415..00000000 --- a/server/samples_0001.json +++ /dev/null @@ -1,139 +0,0 @@ -{ - "RVC": [ - { - "id": "KikotoKurage", - "lang": "ja-JP", - "tag": ["torch", "test"], - "name": "黄琴海月", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/kikoto_kurage_48k_256/kikoto_kurage.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/kikoto_kurage_48k_256/added_IVF2997_Flat_nprobe_11.index.bin", - "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/kikoto_kurage_48k_256/total_fea.npy", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/kikoto_kurage_48k_256/term_of_use.txt", - "credit": "黄琴海月", - "description": "", - "sampleRate": 48000, - "modelType": "webui_v1", - "f0": true - }, - { - "id": "KikotoMahiro", - "lang": "ja-JP", - "tag": ["torch", "test"], - "name": "黄琴まひろ", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/kikoto_mahiro_48k_256/kikoto_mahiro.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/kikoto_mahiro_48k_256/added_IVF4833_Flat_nprobe_12.index.bin", - "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/kikoto_mahiro_48k_256/total_fea.npy", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/kikoto_mahiro_48k_256/term_of_use.txt", - "credit": "黄琴まひろ", - "description": "", - "sampleRate": 48000, - "modelType": "webui_v1", - "f0": true - }, - { - "id": "TokinaShigure", - "lang": "ja-JP", - "tag": ["torch", "test"], - "name": "刻鳴時雨", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/tokina_sigure/Shigure.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/tokina_sigure/added_IVF1572_Flat_nprobe_9.index.bin", - "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/tokina_sigure/total_fea.npy", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/tokina_sigure/term_of_use.txt", - "credit": "刻鳴時雨", - "description": "https://huggingface.co/yasyune/Shigure_Tokina_RVC", - "sampleRate": 40000, - "modelType": "webui_v1", - "f0": true - }, - { - "id": "Amitaro", - "lang": "ja-JP", - "tag": ["torch", "test"], - "name": "あみたろ", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_contentvec_256/amitaro48k-100.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_contentvec_256/amitaro48k.0.index.bin", - "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_contentvec_256/amitaro48k.0.big.npy", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/amitaro_contentvec_256/term_of_use.txt", - "credit": "あみたろ", - "description": "", - "sampleRate": 48000, - "modelType": "webui_v1", - "f0": true - }, - { - "id": "Amitaro_768d", - "lang": "ja-JP", - "tag": ["torch", "test"], - "name": "あみたろ(768d)", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_default/amitaro_hubertjp_768_def-100.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_default/amitaro_hubertjp_768_def.0.index.bin", - "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_default/amitaro_hubertjp_768_def.0.big.npy", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/amitaro_hubertjp_768_default/term_of_use.txt", - "credit": "あみたろ", - "description": "rinna hubertjpを使用した768次元埋め込み版。デフォルトの事前学習モデルを使用", - "sampleRate": 48000, - "modelType": "webui_v1", - "f0": true - }, - { - "id": "Amitaro_768n", - "lang": "ja-JP", - "tag": ["torch", "test"], - "name": "あみたろ(768n)", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_nadare/amitaro_hubert_jp-100.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_nadare/amitaro_hubert_jp.0.index.bin", - "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_nadare/amitaro_hubert_jp.0.big.npy", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/amitaro_hubertjp_768_nadare/term_of_use.txt", - "credit": "あみたろ", - "description": "rinna hubertjpを使用した768次元埋め込み版。nadare様作成の事前学習モデルを使用", - "sampleRate": 48000, - "modelType": "webui_v1", - "f0": true - }, - { - "id": "Amitaro_768t", - "lang": "ja-JP", - "tag": ["torch", "test"], - "name": "あみたろ(768t)", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_tylor/amitaro_hubertjp_768_tylor-100.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_tylor/amitaro_hubertjp_768_tylor.0.index.bin", - "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/amitaro_hubertjp_768_tylor/amitaro_hubertjp_768_tylor.0.big.npy", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/amitaro_hubertjp_768_tylor/term_of_use.txt", - "credit": "あみたろ", - "description": "rinna hubertjpを使用した768次元埋め込み版。tylor様作成の事前学習モデルを使用", - "sampleRate": 48000, - "modelType": "webui_v1", - "f0": true - }, - { - "id": "Tsukuyomi-chan_768d", - "lang": "ja-JP", - "tag": ["torch", "test"], - "name": "つくよみちゃん(768d)", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/tsukuyomi-chan_hubertjp_768_default/tsukuyomi_hubertjp_768_def-100.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/tsukuyomi-chan_hubertjp_768_default/tsukuyomi_hubertjp_768_def.0.index.bin", - "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/tsukuyomi-chan_hubertjp_768_default/tsukuyomi_hubertjp_768_def.0.big.npy", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/tsukuyomi-chan_hubertjp_768_default/term_of_use.txt", - "credit": "つくよみちゃん", - "description": "rinna hubertjpを使用した768次元埋め込み版。デフォルトの事前学習モデルを使用", - "sampleRate": 48000, - "modelType": "webui_v1", - "f0": true - }, - { - "id": "Tsukuyomi-chan", - "lang": "ja-JP", - "tag": ["torch", "test"], - "name": "つくよみちゃん", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/tsukuyomi_contentvec_256/tsukuyomi48k-100.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/tsukuyomi_contentvec_256/tsukuyomi48k.0.index.bin", - "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc/tsukuyomi_contentvec_256/tsukuyomi48k.0.big.npy", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc/tsukuyomi_contentvec_256/term_of_use.txt", - "credit": "つくよみちゃん", - "description": "", - "sampleRate": 48000, - "modelType": "webui_v1", - "f0": true - } - ] -} diff --git a/server/samples_0002.json b/server/samples_0002.json deleted file mode 100644 index c911ae61..00000000 --- a/server/samples_0002.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "RVC": [ - { - "id": "KikotoKurage_v2", - "lang": "ja-JP", - "tag": ["torch", "test"], - "name": "黄琴海月_v2", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_kurage/kikoto_kurage_v2_40k_e100.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_kurage/added_IVF5181_Flat_nprobe_1_v2.index.bin", - "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_kurage/total_fea.npy", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc_v2_alpha/kikoto_kurage/terms_of_use.txt", - "credit": "黄琴海月", - "description": "", - "sampleRate": 40000, - "modelType": "rvc_v2", - "f0": true - }, - { - "id": "KikotoMahiro_v2", - "lang": "ja-JP", - "tag": ["torch", "test"], - "name": "黄琴まひろ_v2", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_mahiro/kikoto_mahiro_v2_40k.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_mahiro/added_IVF6881_Flat_nprobe_1_v2.index.bin", - "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_mahiro/total_fea.npy", - "termsOfUseUrl": "", - "credit": "黄琴まひろ", - "description": "", - "sampleRate": 40000, - "modelType": "rvc_v2", - "f0": true - }, - { - "id": "TokinaShigure_v2", - "lang": "ja-JP", - "tag": ["torch", "test"], - "name": "刻鳴時雨_v2", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tokina_shigure/tokina_shigure_v2_40k_e100.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tokina_shigure/added_IVF2736_Flat_nprobe_1_v2.index.bin", - "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tokina_shigure/total_fea.npy", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc_v2_alpha/tokina_shigure/terms_of_use.txt", - "credit": "刻鳴時雨", - "description": "", - "sampleRate": 40000, - "modelType": "rvc_v2", - "f0": true - }, - { - "id": "Amitaro_v2", - "lang": "ja-JP", - "tag": ["torch", "test"], - "name": "あみたろ_v2", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/amitaro/amitaro_v2_40k_e100.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/amitaro/added_IVF3139_Flat_nprobe_1_v2.index.bin", - "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/amitaro/total_fea.npy", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc_v2_alpha/amitaro/terms_of_use.txt", - "credit": "あみたろ", - "description": "", - "sampleRate": 40000, - "modelType": "rvc_v2", - "f0": true - }, - { - "id": "Tsukuyomi-chan_v2", - "lang": "ja-JP", - "tag": ["torch", "test"], - "name": "つくよみちゃん_v2", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tsukuyomi-chan/tsukuyomi_v2_40k_e100.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tsukuyomi-chan/added_IVF7852_Flat_nprobe_1_v2.index.bin", - "featureUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tsukuyomi-chan/total_fea.npy", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/blob/main/rvc_v2_alpha/tsukuyomi-chan/terms_of_use.txt", - "credit": "つくよみちゃん", - "description": "", - "sampleRate": 40000, - "modelType": "rvc_v2", - "f0": true - } - ] -} diff --git a/server/samples_0003_o.json b/server/samples_0003_o.json deleted file mode 100644 index aada67c1..00000000 --- a/server/samples_0003_o.json +++ /dev/null @@ -1,74 +0,0 @@ -{ - "RVC": [ - { - "id": "KikotoKurage_o", - "lang": "ja-JP", - "tag": ["v2", "onnx"], - "name": "黄琴海月", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_kurage/kikoto_kurage_v2_40k_e100_simple.onnx", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_kurage/added_IVF5181_Flat_nprobe_1_v2.index.bin", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc_v2_alpha/kikoto_kurage/terms_of_use.txt", - "credit": "黄琴海月", - "description": "", - "sampleRate": 40000, - "modelType": "rvc_v2", - "f0": true - }, - { - "id": "KikotoMahiro_o", - "lang": "ja-JP", - "tag": ["v2", "onnx"], - "name": "黄琴まひろ", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_mahiro/kikoto_mahiro_v2_40k_simple.onnx", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_mahiro/added_IVF6881_Flat_nprobe_1_v2.index.bin", - "termsOfUseUrl": "", - "credit": "黄琴まひろ", - "description": "", - "sampleRate": 40000, - "modelType": "rvc_v2", - "f0": true - }, - { - "id": "TokinaShigure_o", - "lang": "ja-JP", - "tag": ["v2", "onnx"], - "name": "刻鳴時雨", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tokina_shigure/tokina_shigure_v2_40k_e100_simple.onnx", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tokina_shigure/added_IVF2736_Flat_nprobe_1_v2.index.bin", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc_v2_alpha/tokina_shigure/terms_of_use.txt", - "credit": "刻鳴時雨", - "description": "", - "sampleRate": 40000, - "modelType": "rvc_v2", - "f0": true - }, - { - "id": "Amitaro_o", - "lang": "ja-JP", - "tag": ["v2", "onnx"], - "name": "あみたろ", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/amitaro/amitaro_v2_40k_e100_simple.onnx", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/amitaro/added_IVF3139_Flat_nprobe_1_v2.index.bin", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc_v2_alpha/amitaro/terms_of_use.txt", - "credit": "あみたろ", - "description": "", - "sampleRate": 40000, - "modelType": "rvc_v2", - "f0": true - }, - { - "id": "Tsukuyomi-chan_o", - "lang": "ja-JP", - "tag": ["v2", "onnx"], - "name": "つくよみちゃん", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tsukuyomi-chan/tsukuyomi_v2_40k_e100_simple.onnx", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tsukuyomi-chan/added_IVF7852_Flat_nprobe_1_v2.index.bin", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/blob/main/rvc_v2_alpha/tsukuyomi-chan/terms_of_use.txt", - "credit": "つくよみちゃん", - "description": "", - "sampleRate": 40000, - "modelType": "rvc_v2", - "f0": true - } - ] -} diff --git a/server/samples_0003_t.json b/server/samples_0003_t.json deleted file mode 100644 index 40116d5a..00000000 --- a/server/samples_0003_t.json +++ /dev/null @@ -1,74 +0,0 @@ -{ - "RVC": [ - { - "id": "KikotoKurage_t", - "lang": "ja-JP", - "tag": ["v2", "torch"], - "name": "黄琴海月", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_kurage/kikoto_kurage_v2_40k_e100.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_kurage/added_IVF5181_Flat_nprobe_1_v2.index.bin", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc_v2_alpha/kikoto_kurage/terms_of_use.txt", - "credit": "黄琴海月", - "description": "", - "sampleRate": 40000, - "modelType": "rvc_v2", - "f0": true - }, - { - "id": "KikotoMahiro_t", - "lang": "ja-JP", - "tag": ["v2", "torch"], - "name": "黄琴まひろ", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_mahiro/kikoto_mahiro_v2_40k.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/kikoto_mahiro/added_IVF6881_Flat_nprobe_1_v2.index.bin", - "termsOfUseUrl": "", - "credit": "黄琴まひろ", - "description": "", - "sampleRate": 40000, - "modelType": "rvc_v2", - "f0": true - }, - { - "id": "TokinaShigure_t", - "lang": "ja-JP", - "tag": ["v2", "torch"], - "name": "刻鳴時雨", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tokina_shigure/tokina_shigure_v2_40k_e100.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tokina_shigure/added_IVF2736_Flat_nprobe_1_v2.index.bin", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc_v2_alpha/tokina_shigure/terms_of_use.txt", - "credit": "刻鳴時雨", - "description": "", - "sampleRate": 40000, - "modelType": "rvc_v2", - "f0": true - }, - { - "id": "Amitaro_t", - "lang": "ja-JP", - "tag": ["v2", "torch"], - "name": "あみたろ", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/amitaro/amitaro_v2_40k_e100.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/amitaro/added_IVF3139_Flat_nprobe_1_v2.index.bin", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/raw/main/rvc_v2_alpha/amitaro/terms_of_use.txt", - "credit": "あみたろ", - "description": "", - "sampleRate": 40000, - "modelType": "rvc_v2", - "f0": true - }, - { - "id": "Tsukuyomi-chan_t", - "lang": "ja-JP", - "tag": ["v2", "torch"], - "name": "つくよみちゃん", - "modelUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tsukuyomi-chan/tsukuyomi_v2_40k_e100.pth", - "indexUrl": "https://huggingface.co/wok000/vcclient_model/resolve/main/rvc_v2_alpha/tsukuyomi-chan/added_IVF7852_Flat_nprobe_1_v2.index.bin", - "termsOfUseUrl": "https://huggingface.co/wok000/vcclient_model/blob/main/rvc_v2_alpha/tsukuyomi-chan/terms_of_use.txt", - "credit": "つくよみちゃん", - "description": "", - "sampleRate": 40000, - "modelType": "rvc_v2", - "f0": true - } - ] -} diff --git a/server/voice_changer/DDSP_SVC/DDSP_SVC.py b/server/voice_changer/DDSP_SVC/DDSP_SVC.py index 85c4245c..b8a7b38c 100644 --- a/server/voice_changer/DDSP_SVC/DDSP_SVC.py +++ b/server/voice_changer/DDSP_SVC/DDSP_SVC.py @@ -226,7 +226,7 @@ class DDSP_SVC: try: file_path = val.__file__ if file_path.find("DDSP-SVC" + os.path.sep) >= 0: - print("remove", key, file_path) + # print("remove", key, file_path) sys.modules.pop(key) except: # type:ignore pass diff --git a/server/voice_changer/MMVCv13/MMVCv13.py b/server/voice_changer/MMVCv13/MMVCv13.py index 331a78e2..aab4f555 100644 --- a/server/voice_changer/MMVCv13/MMVCv13.py +++ b/server/voice_changer/MMVCv13/MMVCv13.py @@ -289,7 +289,7 @@ class MMVCv13: try: file_path = val.__file__ if file_path.find(remove_path + os.path.sep) >= 0: - print("remove", key, file_path) + # print("remove", key, file_path) sys.modules.pop(key) except: # type:ignore pass diff --git a/server/voice_changer/MMVCv15/MMVCv15.py b/server/voice_changer/MMVCv15/MMVCv15.py index eb7188fc..dc8ca682 100644 --- a/server/voice_changer/MMVCv15/MMVCv15.py +++ b/server/voice_changer/MMVCv15/MMVCv15.py @@ -348,7 +348,7 @@ class MMVCv15: try: file_path = val.__file__ if file_path.find(remove_path + os.path.sep) >= 0: - print("remove", key, file_path) + # print("remove", key, file_path) sys.modules.pop(key) except: # type:ignore pass diff --git a/server/voice_changer/RVC/ModelSlot.py b/server/voice_changer/RVC/ModelSlot.py index b008f943..4e8a9e5e 100644 --- a/server/voice_changer/RVC/ModelSlot.py +++ b/server/voice_changer/RVC/ModelSlot.py @@ -5,24 +5,22 @@ from dataclasses import dataclass @dataclass class ModelSlot: - # pyTorchModelFile: str = "" - # onnxModelFile: str = "" modelFile: str = "" - featureFile: str = "" indexFile: str = "" defaultTune: int = 0 defaultIndexRatio: int = 1 isONNX: bool = False - modelType: EnumInferenceTypes = EnumInferenceTypes.pyTorchRVC + modelType: str = EnumInferenceTypes.pyTorchRVC.value samplingRate: int = -1 f0: bool = True embChannels: int = 256 embOutputLayer: int = 9 useFinalProj: bool = True deprecated: bool = False - embedder: EnumEmbedderTypes = EnumEmbedderTypes.hubert + embedder: str = EnumEmbedderTypes.hubert.value name: str = "" description: str = "" credit: str = "" termsOfUseUrl: str = "" + sampleId: str = "" diff --git a/server/voice_changer/RVC/ModelSlotGenerator.py b/server/voice_changer/RVC/ModelSlotGenerator.py index 03c0c28b..d46244a0 100644 --- a/server/voice_changer/RVC/ModelSlotGenerator.py +++ b/server/voice_changer/RVC/ModelSlotGenerator.py @@ -4,55 +4,6 @@ from voice_changer.RVC.ModelSlot import ModelSlot import torch import onnxruntime import json -import os - - -def generateModelSlot(slotDir: str): - try: - modelSlot = ModelSlot() - if os.path.exists(slotDir) is False: - return modelSlot - paramFile = os.path.join(slotDir, "params.json") - with open(paramFile, "r") as f: - params = json.load(f) - - modelSlot.modelFile = os.path.join( - slotDir, os.path.basename(params["files"]["rvcModel"]) - ) - if "rvcFeature" in params["files"]: - modelSlot.featureFile = os.path.join( - slotDir, os.path.basename(params["files"]["rvcFeature"]) - ) - else: - modelSlot.featureFile = None - if "rvcIndex" in params["files"]: - modelSlot.indexFile = os.path.join( - slotDir, os.path.basename(params["files"]["rvcIndex"]) - ) - else: - modelSlot.indexFile = None - - modelSlot.defaultTune = params["defaultTune"] if "defaultTune" in params else 0 - modelSlot.defaultIndexRatio = ( - params["defaultIndexRatio"] if "defaultIndexRatio" in params else 0 - ) - modelSlot.name = params["name"] if "name" in params else None - modelSlot.description = params["description"] if "description" in params else None - modelSlot.credit = params["credit"] if "credit" in params else None - modelSlot.termsOfUseUrl = ( - params["termsOfUseUrl"] if "termsOfUseUrl" in params else None - ) - - modelSlot.isONNX = modelSlot.modelFile.endswith(".onnx") - - if modelSlot.isONNX: - _setInfoByONNX(modelSlot) - else: - _setInfoByPytorch(modelSlot) - return modelSlot - except Exception as e: - print(f"[Voice Changer] faild to generate slot: {e}") - return ModelSlot() def _setInfoByPytorch(slot: ModelSlot): @@ -65,32 +16,34 @@ def _setInfoByPytorch(slot: ModelSlot): version = cpt.get("version", "v1") if version is None or version == "v1": slot.modelType = ( - EnumInferenceTypes.pyTorchRVC + EnumInferenceTypes.pyTorchRVC.value if slot.f0 - else EnumInferenceTypes.pyTorchRVCNono + else EnumInferenceTypes.pyTorchRVCNono.value ) slot.embChannels = 256 slot.embOutputLayer = 9 slot.useFinalProj = True - slot.embedder = EnumEmbedderTypes.hubert + slot.embedder = EnumEmbedderTypes.hubert.value + print("[Voice Changer] Official Model(pyTorch) : v1") else: slot.modelType = ( - EnumInferenceTypes.pyTorchRVCv2 + EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 - else EnumInferenceTypes.pyTorchRVCv2Nono + else EnumInferenceTypes.pyTorchRVCv2Nono.value ) slot.embChannels = 768 slot.embOutputLayer = 12 slot.useFinalProj = False - slot.embedder = EnumEmbedderTypes.hubert + slot.embedder = EnumEmbedderTypes.hubert.value + print("[Voice Changer] Official Model(pyTorch) : v2") else: # DDPN RVC slot.f0 = True if cpt["f0"] == 1 else False slot.modelType = ( - EnumInferenceTypes.pyTorchWebUI + EnumInferenceTypes.pyTorchWebUI.value if slot.f0 - else EnumInferenceTypes.pyTorchWebUINono + else EnumInferenceTypes.pyTorchWebUINono.value ) slot.embChannels = cpt["config"][17] slot.embOutputLayer = ( @@ -107,30 +60,30 @@ def _setInfoByPytorch(slot: ModelSlot): and slot.embOutputLayer == 9 and slot.useFinalProj is True ): - print("[Voice Changer] DDPN Model: Original v1 like") + print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like") elif ( slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False ): - print("[Voice Changer] DDPN Model: Original v2 like") + print("[Voice Changer] DDPN Model(pyTorch): Official v2 like") else: print( - f"[Voice Changer] DDPN Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}" + f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}" ) slot.embedder = cpt["embedder_name"] if slot.embedder.endswith("768"): slot.embedder = slot.embedder[:-3] - if slot.embedder == EnumEmbedderTypes.hubert.value: - slot.embedder = EnumEmbedderTypes.hubert - elif slot.embedder == EnumEmbedderTypes.contentvec.value: - slot.embedder = EnumEmbedderTypes.contentvec - elif slot.embedder == EnumEmbedderTypes.hubert_jp.value: - slot.embedder = EnumEmbedderTypes.hubert_jp - else: - raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder") + # if slot.embedder == EnumEmbedderTypes.hubert.value: + # slot.embedder = EnumEmbedderTypes.hubert + # elif slot.embedder == EnumEmbedderTypes.contentvec.value: + # slot.embedder = EnumEmbedderTypes.contentvec + # elif slot.embedder == EnumEmbedderTypes.hubert_jp.value: + # slot.embedder = EnumEmbedderTypes.hubert_jp + # else: + # raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder") slot.samplingRate = cpt["config"][-1] @@ -170,40 +123,44 @@ def _setInfoByONNX(slot: ModelSlot): and slot.embOutputLayer == 9 and slot.useFinalProj is True ): - print("[Voice Changer] ONNX Model: Original v1 like") + print("[Voice Changer] ONNX Model: Official v1 like") elif ( slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False ): - print("[Voice Changer] ONNX Model: Original v2 like") + print("[Voice Changer] ONNX Model: Official v2 like") else: print( f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}" ) if "embedder" not in metadata: - slot.embedder = EnumEmbedderTypes.hubert - elif metadata["embedder"] == EnumEmbedderTypes.hubert.value: - slot.embedder = EnumEmbedderTypes.hubert - elif metadata["embedder"] == EnumEmbedderTypes.contentvec.value: - slot.embedder = EnumEmbedderTypes.contentvec - elif metadata["embedder"] == EnumEmbedderTypes.hubert_jp.value: - slot.embedder = EnumEmbedderTypes.hubert_jp + slot.embedder = EnumEmbedderTypes.hubert.value else: - raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder") + slot.embedder = metadata["embedder"] + # elif metadata["embedder"] == EnumEmbedderTypes.hubert.value: + # slot.embedder = EnumEmbedderTypes.hubert + # elif metadata["embedder"] == EnumEmbedderTypes.contentvec.value: + # slot.embedder = EnumEmbedderTypes.contentvec + # elif metadata["embedder"] == EnumEmbedderTypes.hubert_jp.value: + # slot.embedder = EnumEmbedderTypes.hubert_jp + # else: + # raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder") slot.f0 = metadata["f0"] slot.modelType = ( - EnumInferenceTypes.onnxRVC if slot.f0 else EnumInferenceTypes.onnxRVCNono + EnumInferenceTypes.onnxRVC.value + if slot.f0 + else EnumInferenceTypes.onnxRVCNono.value ) slot.samplingRate = metadata["samplingRate"] slot.deprecated = False except Exception as e: - slot.modelType = EnumInferenceTypes.onnxRVC + slot.modelType = EnumInferenceTypes.onnxRVC.value slot.embChannels = 256 - slot.embedder = EnumEmbedderTypes.hubert + slot.embedder = EnumEmbedderTypes.hubert.value slot.f0 = True slot.samplingRate = 48000 slot.deprecated = True diff --git a/server/voice_changer/RVC/RVC.py b/server/voice_changer/RVC/RVC.py index 5184ffcb..e92db06d 100644 --- a/server/voice_changer/RVC/RVC.py +++ b/server/voice_changer/RVC/RVC.py @@ -6,6 +6,7 @@ import numpy as np import torch import torchaudio from ModelSample import getModelSamples +from voice_changer.RVC.ModelSlot import ModelSlot from voice_changer.RVC.SampleDownloader import downloadModelFiles @@ -24,7 +25,10 @@ else: from voice_changer.RVC.modelMerger.MergeModel import merge_model from voice_changer.RVC.modelMerger.MergeModelRequest import MergeModelRequest -from voice_changer.RVC.ModelSlotGenerator import generateModelSlot +from voice_changer.RVC.ModelSlotGenerator import ( + _setInfoByONNX, + _setInfoByPytorch, +) from voice_changer.RVC.RVCSettings import RVCSettings from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager from voice_changer.utils.LoadModelParams import LoadModelParams @@ -37,7 +41,7 @@ from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager from voice_changer.RVC.pipeline.Pipeline import Pipeline from Exceptions import NoModeLoadedException -from const import RVC_MAX_SLOT_NUM, RVC_MODEL_DIRNAME, SAMPLES_JSONS, UPLOAD_DIR +from const import RVC_MODEL_DIRNAME, SAMPLES_JSONS, UPLOAD_DIR import shutil import json @@ -65,6 +69,7 @@ class RVC: self.loadSlots() print("RVC initialization: ", params) + # サンプルカタログ作成 sampleJsons: list[str] = [] for url in SAMPLES_JSONS: filename = os.path.basename(url) @@ -82,7 +87,7 @@ class RVC: self.switchModel(self.settings.modelSlotIndex) self.initialLoad = False break - self.prevVol = 0. + self.prevVol = 0.0 def getSampleInfo(self, id: str): sampleInfos = list(filter(lambda x: x.id == id, self.settings.sampleModels)) @@ -101,6 +106,7 @@ class RVC: def loadModel(self, props: LoadModelParams): target_slot_idx = props.slot params = props.params + slotInfo: ModelSlot = ModelSlot() print("loadModel", params) # サンプルが指定されたときはダウンロードしてメタデータをでっちあげる @@ -113,34 +119,43 @@ class RVC: print("[Voice Changer] sampleInfo is None") return modelPath, indexPath = downloadModelFiles(sampleInfo, useIndex) - params["files"]["rvcModel"] = modelPath + slotInfo.modelFile = modelPath if indexPath is not None: - params["files"]["rvcIndex"] = indexPath - params["credit"] = sampleInfo.credit - params["description"] = sampleInfo.description - params["name"] = sampleInfo.name - params["sampleId"] = sampleInfo.id - params["termsOfUseUrl"] = sampleInfo.termsOfUseUrl - params["sampleRate"] = sampleInfo.sampleRate - params["modelType"] = sampleInfo.modelType - params["f0"] = sampleInfo.f0 + slotInfo.indexFile = indexPath + + slotInfo.sampleId = sampleInfo.id + slotInfo.credit = sampleInfo.credit + slotInfo.description = sampleInfo.description + slotInfo.name = sampleInfo.name + slotInfo.termsOfUseUrl = sampleInfo.termsOfUseUrl + # slotInfo.samplingRate = sampleInfo.sampleRate + # slotInfo.modelType = sampleInfo.modelType + # slotInfo.f0 = sampleInfo.f0 + else: + slotInfo.modelFile = params["files"]["rvcModel"] + slotInfo.indexFile = ( + params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None + ) + + slotInfo.defaultTune = params["defaultTune"] + slotInfo.defaultIndexRatio = params["defaultIndexRatio"] + slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx") + + if slotInfo.isONNX: + _setInfoByONNX(slotInfo) + else: + _setInfoByPytorch(slotInfo) + # メタデータを見て、永続化モデルフォルダに移動させる # その際に、メタデータのファイル格納場所も書き換える slotDir = os.path.join( self.params.model_dir, RVC_MODEL_DIRNAME, str(target_slot_idx) ) os.makedirs(slotDir, exist_ok=True) - - modelDst = self.moveToModelDir(params["files"]["rvcModel"], slotDir) - params["files"]["rvcModel"] = modelDst - if "rvcFeature" in params["files"]: - featureDst = self.moveToModelDir(params["files"]["rvcFeature"], slotDir) - params["files"]["rvcFeature"] = featureDst - if "rvcIndex" in params["files"]: - indexDst = self.moveToModelDir(params["files"]["rvcIndex"], slotDir) - params["files"]["rvcIndex"] = indexDst - - json.dump(params, open(os.path.join(slotDir, "params.json"), "w")) + slotInfo.modelFile = self.moveToModelDir(slotInfo.modelFile, slotDir) + if slotInfo.indexFile is not None: + slotInfo.indexFile = self.moveToModelDir(slotInfo.indexFile, slotDir) + json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w")) self.loadSlots() # 初回のみロード(起動時にスロットにモデルがあった場合はinitialLoadはFalseになっている) @@ -156,16 +171,22 @@ class RVC: def loadSlots(self): dirname = os.path.join(self.params.model_dir, RVC_MODEL_DIRNAME) - self.settings.modelSlots = [] if not os.path.exists(dirname): return - for slot_idx in range(RVC_MAX_SLOT_NUM): + modelSlots: list[ModelSlot] = [] + for slot_idx in range(len(self.settings.modelSlots)): slotDir = os.path.join( self.params.model_dir, RVC_MODEL_DIRNAME, str(slot_idx) ) - modelSlot = generateModelSlot(slotDir) - self.settings.modelSlots.append(modelSlot) + jsonDict = os.path.join(slotDir, "params.json") + if os.path.exists(jsonDict): + jsonDict = json.load(open(os.path.join(slotDir, "params.json"))) + slotInfo = ModelSlot(**jsonDict) + else: + slotInfo = ModelSlot() + modelSlots.append(slotInfo) + self.settings.modelSlots = modelSlots def update_settings(self, key: str, val: int | float | str): if key in self.settings.intData: @@ -276,13 +297,21 @@ class RVC: convertOffset = -1 * convertSize self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出 - audio_buffer = torch.from_numpy(self.audio_buffer).to(device=self.pipeline.device, dtype=torch.float32) + + if self.pipeline is not None: + device = self.pipeline.device + else: + device = torch.device("cpu") + + audio_buffer = torch.from_numpy(self.audio_buffer).to( + device=device, dtype=torch.float32 + ) # 出力部分だけ切り出して音量を確認。(TODO:段階的消音にする) cropOffset = -1 * (inputSize + crossfadeSize) cropEnd = -1 * (crossfadeSize) crop = audio_buffer[cropOffset:cropEnd] - vol = torch.sqrt(torch.square(crop).mean(axis=0)).detach().cpu().numpy() + vol = torch.sqrt(torch.square(crop).mean()).detach().cpu().numpy() vol = max(vol, self.prevVol * 0.0) self.prevVol = vol @@ -312,7 +341,9 @@ class RVC: if vol < self.settings.silentThreshold: return np.zeros(convertSize).astype(np.int16) - audio = torchaudio.functional.resample(audio, self.settings.modelSamplingRate, 16000, rolloff=0.99) + audio = torchaudio.functional.resample( + audio, self.settings.modelSamplingRate, 16000, rolloff=0.99 + ) repeat = 3 if half else 1 repeat *= self.settings.rvcQuality # 0 or 3 sid = 0 @@ -341,7 +372,7 @@ class RVC: def __del__(self): del self.pipeline - print("---------- REMOVING ---------------") + # print("---------- REMOVING ---------------") remove_path = os.path.join("RVC") sys.path = [x for x in sys.path if x.endswith(remove_path) is False] @@ -351,7 +382,7 @@ class RVC: try: file_path = val.__file__ if file_path.find("RVC" + os.path.sep) >= 0: - print("remove", key, file_path) + # print("remove", key, file_path) sys.modules.pop(key) except Exception: # type:ignore # print(e) diff --git a/server/voice_changer/RVC/RVCSettings.py b/server/voice_changer/RVC/RVCSettings.py index 23153558..026cf109 100644 --- a/server/voice_changer/RVC/RVCSettings.py +++ b/server/voice_changer/RVC/RVCSettings.py @@ -1,5 +1,6 @@ from dataclasses import dataclass, field from ModelSample import RVCModelSample +from const import RVC_MAX_SLOT_NUM from voice_changer.RVC.ModelSlot import ModelSlot @@ -17,15 +18,9 @@ class RVCSettings: framework: str = "PyTorch" # PyTorch or ONNX modelSlots: list[ModelSlot] = field( - default_factory=lambda: [ - ModelSlot(), # 1 - ModelSlot(), # 2 - ModelSlot(), # 3 - ModelSlot(), # 4 - ModelSlot(), # 5 - ModelSlot(), # 6(merged) - ] + default_factory=lambda: [ModelSlot() for _x in range(RVC_MAX_SLOT_NUM)] ) + sampleModels: list[RVCModelSample] = field(default_factory=lambda: []) indexRatio: float = 0 diff --git a/server/voice_changer/RVC/SampleDownloader.py b/server/voice_changer/RVC/SampleDownloader.py index a0e4daf1..867a85da 100644 --- a/server/voice_changer/RVC/SampleDownloader.py +++ b/server/voice_changer/RVC/SampleDownloader.py @@ -1,11 +1,14 @@ from concurrent.futures import ThreadPoolExecutor +from dataclasses import asdict import os from const import RVC_MODEL_DIRNAME, TMP_DIR from Downloader import download, download_no_tqdm from ModelSample import RVCModelSample, getModelSamples -from typing import Any import json +from voice_changer.RVC.ModelSlot import ModelSlot +from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch + def checkRvcModelExist(model_dir: str): rvcModelDir = os.path.join(model_dir, RVC_MODEL_DIRNAME) @@ -15,12 +18,38 @@ def checkRvcModelExist(model_dir: str): def downloadInitialSampleModels(sampleJsons: list[str], model_dir: str): + # sampleModelIds = [ + # ("TokinaShigure_o", True), + # ("KikotoMahiro_o", False), + # ("Amitaro_o", False), + # ("Tsukuyomi-chan_o", False), + # ] sampleModelIds = [ - ("TokinaShigure_o", True), - ("KikotoMahiro_o", False), - ("Amitaro_o", False), - ("Tsukuyomi-chan_o", False), + # オフィシャルモデルテスト + # ("test-official-v1-f0-48k-l9-hubert_t", True), + # ("test-official-v1-nof0-48k-l9-hubert_t", False), + # ("test-official-v2-f0-40k-l12-hubert_t", False), + # ("test-official-v2-nof0-40k-l12-hubert_t", False), + # ("test-official-v1-f0-48k-l9-hubert_o", True), + # ("test-official-v1-nof0-48k-l9-hubert_o", False), + # ("test-official-v2-f0-40k-l12-hubert_o", False), + # ("test-official-v2-nof0-40k-l12-hubert_o", False), + # DDPNモデルテスト(torch) + # ("test-ddpn-v1-f0-48k-l9-hubert_t", False), + # ("test-ddpn-v1-nof0-48k-l9-hubert_t", False), + # ("test-ddpn-v2-f0-40k-l12-hubert_t", False), + # ("test-ddpn-v2-nof0-40k-l12-hubert_t", False), + # ("test-ddpn-v2-f0-40k-l12-hubert_jp_t", False), + # ("test-ddpn-v2-nof0-40k-l12-hubert_jp_t", False), + # DDPNモデルテスト(onnx) + ("test-ddpn-v1-f0-48k-l9-hubert_o", False), + ("test-ddpn-v1-nof0-48k-l9-hubert_o", False), + ("test-ddpn-v2-f0-40k-l12-hubert_o", False), + ("test-ddpn-v2-nof0-40k-l12-hubert_o", False), + ("test-ddpn-v2-f0-40k-l12-hubert_jp_o", False), + ("test-ddpn-v2-nof0-40k-l12-hubert_jp_o", False), ] + sampleModels = getModelSamples(sampleJsons, "RVC") if sampleModels is None: return @@ -29,7 +58,6 @@ def downloadInitialSampleModels(sampleJsons: list[str], model_dir: str): slot_count = 0 line_num = 0 for initSampleId in sampleModelIds: - print(initSampleId) # 初期サンプルをサーチ match = False for sample in sampleModels: @@ -41,7 +69,8 @@ def downloadInitialSampleModels(sampleJsons: list[str], model_dir: str): continue # 検出されたら、、、 - sampleParams: Any = {"files": {}} + slotInfo: ModelSlot = ModelSlot() + # sampleParams: Any = {"files": {}} slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slot_count)) os.makedirs(slotDir, exist_ok=True) @@ -56,7 +85,7 @@ def downloadInitialSampleModels(sampleJsons: list[str], model_dir: str): "position": line_num, } ) - sampleParams["files"]["rvcModel"] = modelFilePath + slotInfo.modelFile = modelFilePath line_num += 1 if ( @@ -75,29 +104,45 @@ def downloadInitialSampleModels(sampleJsons: list[str], model_dir: str): "position": line_num, } ) - sampleParams["files"]["rvcIndex"] = indexPath + slotInfo.indexFile = indexPath line_num += 1 - sampleParams["sampleId"] = sample.id - sampleParams["defaultTune"] = 0 - sampleParams["defaultIndexRatio"] = 1 - sampleParams["credit"] = sample.credit - sampleParams["description"] = sample.description - sampleParams["name"] = sample.name - sampleParams["sampleId"] = sample.id - sampleParams["termsOfUseUrl"] = sample.termsOfUseUrl - sampleParams["sampleRate"] = sample.sampleRate - sampleParams["modelType"] = sample.modelType - sampleParams["f0"] = sample.f0 + slotInfo.sampleId = sample.id + slotInfo.credit = sample.credit + slotInfo.description = sample.description + slotInfo.name = sample.name + slotInfo.termsOfUseUrl = sample.termsOfUseUrl - jsonFilePath = os.path.join(slotDir, "params.json") - json.dump(sampleParams, open(jsonFilePath, "w")) + slotInfo.defaultTune = 0 + slotInfo.defaultIndexRatio = 1 + slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx") + + # この時点ではまだファイルはダウンロードされていない + # if slotInfo.isONNX: + # _setInfoByONNX(slotInfo) + # else: + # _setInfoByPytorch(slotInfo) + + json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w")) slot_count += 1 + # ダウンロード print("[Voice Changer] Downloading model files...") with ThreadPoolExecutor() as pool: pool.map(download, downloadParams) + # メタデータ作成 + print("[Voice Changer] Generating metadata...") + for slotId in range(slot_count): + slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slotId)) + jsonDict = json.load(open(os.path.join(slotDir, "params.json"))) + slotInfo = ModelSlot(**jsonDict) + if slotInfo.isONNX: + _setInfoByONNX(slotInfo) + else: + _setInfoByPytorch(slotInfo) + json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w")) + def downloadModelFiles(sampleInfo: RVCModelSample, useIndex: bool = True): downloadParams = [] diff --git a/server/voice_changer/RVC/onnxExporter/export2onnx.py b/server/voice_changer/RVC/onnxExporter/export2onnx.py index 0bf8b613..f4cbd74c 100644 --- a/server/voice_changer/RVC/onnxExporter/export2onnx.py +++ b/server/voice_changer/RVC/onnxExporter/export2onnx.py @@ -38,13 +38,11 @@ def export2onnx(gpu: int, modelSlot: ModelSlot): metadata = { "application": "VC_CLIENT", "version": "2", - # ↓EnumInferenceTypesのままだとシリアライズできないのでテキスト化 - "modelType": modelSlot.modelType.value, + "modelType": modelSlot.modelType, "samplingRate": modelSlot.samplingRate, "f0": modelSlot.f0, "embChannels": modelSlot.embChannels, - # ↓EnumEmbedderTypesのままだとシリアライズできないのでテキスト化 - "embedder": modelSlot.embedder.value, + "embedder": modelSlot.embedder, "embOutputLayer": modelSlot.embOutputLayer, "useFinalProj": modelSlot.useFinalProj, } diff --git a/server/voice_changer/RVC/pipeline/Pipeline.py b/server/voice_changer/RVC/pipeline/Pipeline.py index c240d1cd..15021331 100644 --- a/server/voice_changer/RVC/pipeline/Pipeline.py +++ b/server/voice_changer/RVC/pipeline/Pipeline.py @@ -81,7 +81,9 @@ class Pipeline(object): self.t_pad = self.sr * repeat self.t_pad_tgt = self.targetSR * repeat - audio_pad = F.pad(audio.unsqueeze(0), (self.t_pad, self.t_pad), mode="reflect").squeeze(0) + audio_pad = F.pad( + audio.unsqueeze(0), (self.t_pad, self.t_pad), mode="reflect" + ).squeeze(0) p_len = audio_pad.shape[0] // self.window sid = torch.tensor(sid, device=self.device).unsqueeze(0).long() @@ -102,8 +104,8 @@ class Pipeline(object): pitchf = torch.tensor( pitchf, device=self.device, dtype=torch.float ).unsqueeze(0) - except IndexError as e: - print(e) + except IndexError: + # print(e) raise NotEnoughDataExtimateF0() # tensor型調整 @@ -142,7 +144,7 @@ class Pipeline(object): k = 1 if k == 1: _, ix = self.index.search(npy, 1) - npy = self.big_npy[ix.squeeze()] + npy = self.big_npy[ix.squeeze()] else: score, ix = self.index.search(npy, k=8) weight = np.square(1 / score) @@ -171,12 +173,16 @@ class Pipeline(object): try: with torch.no_grad(): audio1 = ( - ( - torch.clip(self.inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0].to(dtype=torch.float32), -1., 1.) * 32767.5 - .5 + torch.clip( + self.inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][ + 0, 0 + ].to(dtype=torch.float32), + -1.0, + 1.0, ) - .data - .to(dtype=torch.int16) - ) + * 32767.5 + - 0.5 + ).data.to(dtype=torch.int16) except RuntimeError as e: if "HALF" in e.__str__().upper(): raise HalfPrecisionChangingException() diff --git a/server/voice_changer/SoVitsSvc40/SoVitsSvc40.py b/server/voice_changer/SoVitsSvc40/SoVitsSvc40.py index 12acb110..86a9cda7 100644 --- a/server/voice_changer/SoVitsSvc40/SoVitsSvc40.py +++ b/server/voice_changer/SoVitsSvc40/SoVitsSvc40.py @@ -468,7 +468,7 @@ class SoVitsSvc40: try: file_path = val.__file__ if file_path.find("so-vits-svc-40" + os.path.sep) >= 0: - print("remove", key, file_path) + # print("remove", key, file_path) sys.modules.pop(key) except Exception: # type:ignore pass diff --git a/server/voice_changer/SoVitsSvc40v2/SoVitsSvc40v2.py b/server/voice_changer/SoVitsSvc40v2/SoVitsSvc40v2.py index e264d2c3..9d8cf8f7 100644 --- a/server/voice_changer/SoVitsSvc40v2/SoVitsSvc40v2.py +++ b/server/voice_changer/SoVitsSvc40v2/SoVitsSvc40v2.py @@ -417,7 +417,7 @@ class SoVitsSvc40v2: try: file_path = val.__file__ if file_path.find("so-vits-svc-40v2" + os.path.sep) >= 0: - print("remove", key, file_path) + # print("remove", key, file_path) sys.modules.pop(key) except: # type:ignore pass diff --git a/server/voice_changer/VoiceChanger.py b/server/voice_changer/VoiceChanger.py index 411cfe78..80cb03bf 100755 --- a/server/voice_changer/VoiceChanger.py +++ b/server/voice_changer/VoiceChanger.py @@ -495,7 +495,7 @@ class VoiceChanger: result = output_wav else: - print("[Voice Changer] no sola buffer. (You can ignore this.)") + print("[Voice Changer] warming up... generating sola buffer.") result = np.zeros(4096).astype(np.int16) if ( @@ -562,11 +562,11 @@ class VoiceChanger: except ONNXInputArgumentException as e: print("[Voice Changer] [Exception] onnx are waiting valid input.", e) return np.zeros(1).astype(np.int16), [0, 0, 0] - except HalfPrecisionChangingException as e: - print("[Voice Changer] Switching model configuration....", e) + except HalfPrecisionChangingException: + print("[Voice Changer] Switching model configuration....") return np.zeros(1).astype(np.int16), [0, 0, 0] - except NotEnoughDataExtimateF0 as e: - print("[Voice Changer] not enough data", e) + except NotEnoughDataExtimateF0: + print("[Voice Changer] warming up... waiting more data.") return np.zeros(1).astype(np.int16), [0, 0, 0] except DeviceChangingException as e: print("[Voice Changer] embedder:", e)