From c118572aae8b5bfa0e2b9fd99a4efe8da04705e8 Mon Sep 17 00:00:00 2001 From: w-okada <48346627+w-okada@users.noreply.github.com> Date: Thu, 12 Jan 2023 18:33:47 +0900 Subject: [PATCH] =?UTF-8?q?Colaboratory=20=E3=82=92=E4=BD=BF=E7=94=A8?= =?UTF-8?q?=E3=81=97=E3=81=A6=E4=BD=9C=E6=88=90=E3=81=97=E3=81=BE=E3=81=97?= =?UTF-8?q?=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- VoiceChangerDemo.ipynb | 230 +++++++++++++++-------------------------- 1 file changed, 82 insertions(+), 148 deletions(-) diff --git a/VoiceChangerDemo.ipynb b/VoiceChangerDemo.ipynb index 48d744aa..8c34783a 100644 --- a/VoiceChangerDemo.ipynb +++ b/VoiceChangerDemo.ipynb @@ -5,7 +5,7 @@ "colab": { "name": "VoiceChangerDemo", "provenance": [], - "authorship_tag": "ABX9TyMYxiOpYpg7Jdcqs6o5zO6u", + "authorship_tag": "ABX9TyM3qbRFKy3uo9w3dqoPIKD4", "include_colab_link": true }, "kernelspec": { @@ -74,7 +74,7 @@ "base_uri": "https://localhost:8080/" }, "id": "vV1t7PBRm-o6", - "outputId": "58e3f73b-3a8f-417a-b76c-c650f94669a7" + "outputId": "a7e63d28-9c42-41e3-c2fa-3ca4e48a4a26" }, "execution_count": 1, "outputs": [ @@ -82,7 +82,7 @@ "output_type": "stream", "name": "stdout", "text": [ - "Sat Dec 10 06:05:51 2022 \n", + "Thu Jan 12 09:17:56 2023 \n", "+-----------------------------------------------------------------------------+\n", "| NVIDIA-SMI 460.32.03 Driver Version: 460.32.03 CUDA Version: 11.2 |\n", "|-------------------------------+----------------------+----------------------+\n", @@ -91,7 +91,7 @@ "| | | MIG M. |\n", "|===============================+======================+======================|\n", "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", - "| N/A 47C P0 26W / 70W | 0MiB / 15109MiB | 0% Default |\n", + "| N/A 35C P8 9W / 70W | 0MiB / 15109MiB | 0% Default |\n", "| | | N/A |\n", "+-------------------------------+----------------------+----------------------+\n", " \n", @@ -123,13 +123,19 @@ "cell_type": "code", "source": [ "# (2-1) 使用するモデルとコンフィグファイルの指定\n", + "if \"MODEL\" in locals():\n", + " del MODEL\n", + "if \"ONNX\" in locals():\n", + " del ONNX\n", + "\n", "CONFIG=\"/content/drive/MyDrive/VoiceChanger/config.json\"\n", - "MODEL=\"/content/drive/MyDrive/VoiceChanger/G_326000.pth\"" + "#MODEL=\"/content/drive/MyDrive/VoiceChanger/G_326000.pth\"\n", + "ONNX=\"/content/drive/MyDrive/VoiceChanger/G_326000.onnx\"" ], "metadata": { "id": "nSXATMWYb4Ik" }, - "execution_count": 2, + "execution_count": 19, "outputs": [] }, { @@ -140,7 +146,7 @@ "base_uri": "https://localhost:8080/" }, "id": "2wxD-gRSMU5R", - "outputId": "17c5e430-da38-41b4-b59d-032e0a6639b9" + "outputId": "b61f5348-e0e7-436f-cd5d-7e507759e11c" }, "outputs": [ { @@ -171,18 +177,17 @@ "cell_type": "code", "source": [ "# (3) リポジトリのクローン\n", - "!git clone --depth 1 https://github.com/w-okada/voice-changer.git -b ver_1.2\n", - "%cd voice-changer/demo/MMVC_Trainer/monotonic_align \n", - "!cythonize -3 -i core.pyx &> /dev/null\n", - "!mv core.cpython-*.so monotonic_align &> /dev/null\n", - "%cd /content/voice-changer/demo/\n" + "!git clone --depth 1 https://github.com/w-okada/voice-changer.git -b v.1.3.2\n", + "%cd voice-changer/server\n", + "!git clone --depth 1 https://github.com/isletennos/MMVC_Trainer.git -b v1.3.2.2\n", + "!cd MMVC_Trainer/monotonic_align/ && python setup.py build_ext --inplace && cd -" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "86wTFmqsNMnD", - "outputId": "aa92d88d-4bd6-4931-f801-79a32b7f6abe" + "outputId": "a7c679fa-b792-49ef-d656-60f27c63e9e9" }, "execution_count": 4, "outputs": [ @@ -191,13 +196,21 @@ "name": "stdout", "text": [ "Cloning into 'voice-changer'...\n", - "remote: Enumerating objects: 1030, done.\u001b[K\n", - "remote: Counting objects: 100% (1030/1030), done.\u001b[K\n", - "remote: Compressing objects: 100% (919/919), done.\u001b[K\n", - "remote: Total 1030 (delta 21), reused 982 (delta 11), pack-reused 0\u001b[K\n", - "Receiving objects: 100% (1030/1030), 71.87 MiB | 22.15 MiB/s, done.\n", + "remote: Enumerating objects: 143, done.\u001b[K\n", + "remote: Counting objects: 100% (143/143), done.\u001b[K\n", + "remote: Compressing objects: 100% (128/128), done.\u001b[K\n", + "remote: Total 143 (delta 21), reused 57 (delta 5), pack-reused 0\u001b[K\n", + "Receiving objects: 100% (143/143), 1.51 MiB | 2.50 MiB/s, done.\n", "Resolving deltas: 100% (21/21), done.\n", - "Note: checking out 'e824f89e8fac9d202d5ff72ebb13a4e1bf57b4d3'.\n", + "/content/voice-changer/server\n", + "Cloning into 'MMVC_Trainer'...\n", + "remote: Enumerating objects: 920, done.\u001b[K\n", + "remote: Counting objects: 100% (920/920), done.\u001b[K\n", + "remote: Compressing objects: 100% (830/830), done.\u001b[K\n", + "remote: Total 920 (delta 4), reused 893 (delta 1), pack-reused 0\u001b[K\n", + "Receiving objects: 100% (920/920), 53.04 MiB | 17.53 MiB/s, done.\n", + "Resolving deltas: 100% (4/4), done.\n", + "Note: checking out 'f17c8c57d1ab7512633e6c57521f1eef6851bd0e'.\n", "\n", "You are in 'detached HEAD' state. You can look around, make experimental\n", "changes and commit them, and you can discard any commits you make in this\n", @@ -208,118 +221,17 @@ "\n", " git checkout -b \n", "\n", - "/content/voice-changer/demo/MMVC_Trainer/monotonic_align\n", - "/content/voice-changer/demo\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# ファイルの配置\n", - "アプリケーションの挙動を記した設定ファイルをコピーします(4-1)。(4-2)はコピーした設定ファイルを表示しています。もしかしたらうまく動かないときに役立つかもしれません。" - ], - "metadata": { - "id": "jmDY8W_fnuSi" - } - }, - { - "cell_type": "code", - "source": [ - "# (4-1) 設定ファイルの配置\n", - "!cp ../template/setting_mmvc_colab.json ../frontend/dist/assets/setting.json\n" - ], - "metadata": { - "id": "Bn4kV8TgXp8i" - }, - "execution_count": 5, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# (4-2) 設定ファイルの確認\n", - "!cat ../frontend/dist/assets/setting.json\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "pjxPsOOaXXTj", - "outputId": "2c418c42-235e-4d7b-db62-858c226a2478" - }, - "execution_count": 6, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "{\n", - " \"app_title\": \"voice-changer\",\n", - " \"majar_mode\": \"colab\",\n", - " \"voice_changer_server_url\": \"/test\",\n", - " \"sample_rate\": 48000,\n", - " \"buffer_size\": 1024,\n", - " \"prefix_chunk_size\": 48,\n", - " \"chunk_size\": 48,\n", - " \"speakers\": [\n", - " {\n", - " \"id\": 100,\n", - " \"name\": \"ずんだもん\"\n", - " },\n", - " {\n", - " \"id\": 107,\n", - " \"name\": \"user\"\n", - " },\n", - " {\n", - " \"id\": 101,\n", - " \"name\": \"そら\"\n", - " },\n", - " {\n", - " \"id\": 102,\n", - " \"name\": \"めたん\"\n", - " },\n", - " {\n", - " \"id\": 103,\n", - " \"name\": \"つむぎ\"\n", - " }\n", - " ],\n", - " \"src_id\": 107,\n", - " \"dst_id\": 100,\n", - " \"vf_enable\": true,\n", - " \"voice_changer_mode\": \"realtime\",\n", - " \"gpu\": 0,\n", - " \"available_gpus\": [-1, 0, 1, 2, 3, 4],\n", - " \"screen\": {\n", - " \"enable_screen\": true,\n", - " \"backgournd_image_url\": \"./assets/images/bg_natural_sougen.jpg\"\n", - " },\n", - " \"avatar\": {\n", - " \"enable_avatar\": false,\n", - " \"motion_capture_face\": false,\n", - " \"motion_capture_upperbody\": false,\n", - " \"lip_overwrite_with_voice\": false,\n", - " \"avatar_url\": \"./assets/vrm/zundamon/zundamon.vrm\",\n", - " \"background_color\": \"#0000dd\",\n", - " \"chroma_key\": \"#0000dd\",\n", - " \"avatar_canvas_size\": [1280, 720],\n", - " \"screen_canvas_size\": [1280, 720]\n", - " },\n", - " \"advance\": {\n", - " \"avatar_draw_skip_rate\": 3,\n", - " \"screen_draw_skip_rate\": 3,\n", - " \"visualizer_draw_skip_rate\": 3,\n", - " \"cross_fade_lower_value\": 0.1,\n", - " \"cross_fade_offset_rate\": 0.3,\n", - " \"cross_fade_end_rate\": 0.6,\n", - " \"cross_fade_type\": 2\n", - " },\n", - " \"transcribe\": {\n", - " \"lang\": \"日本語(ja-JP)\",\n", - " \"expire_time\": 5\n", - " }\n", - "}\n" + "Compiling core.pyx because it changed.\n", + "[1/1] Cythonizing core.pyx\n", + "/usr/local/lib/python3.8/dist-packages/Cython/Compiler/Main.py:369: FutureWarning: Cython directive 'language_level' not set, using 2 for now (Py2). This will change in a later release! File: /content/voice-changer/server/MMVC_Trainer/monotonic_align/core.pyx\n", + " tree = Parsing.p_module(s, pxd, full_module_name)\n", + "running build_ext\n", + "building 'monotonic_align.core' extension\n", + "creating build\n", + "creating build/temp.linux-x86_64-3.8\n", + "x86_64-linux-gnu-gcc -pthread -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/usr/local/lib/python3.8/dist-packages/numpy/core/include -I/usr/include/python3.8 -c core.c -o build/temp.linux-x86_64-3.8/core.o\n", + "x86_64-linux-gnu-gcc -pthread -shared -Wl,-O1 -Wl,-Bsymbolic-functions -Wl,-Bsymbolic-functions -g -fwrapv -O2 -Wl,-Bsymbolic-functions -g -fwrapv -O2 -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 build/temp.linux-x86_64-3.8/core.o -o /content/voice-changer/server/MMVC_Trainer/monotonic_align/monotonic_align/core.cpython-38-x86_64-linux-gnu.so\n", + "/content/voice-changer/server\n" ] } ] @@ -348,12 +260,13 @@ "!pip install python-multipart &> /dev/null\n", "!pip install uvicorn &> /dev/null\n", "!pip install websockets &> /dev/null\n", - "!pip install pyOpenSSL &> /dev/null" + "!pip install pyOpenSSL &> /dev/null\n", + "!pip install onnxruntime-gpu &> /dev/null" ], "metadata": { "id": "LwZAAuqxX7yY" }, - "execution_count": 7, + "execution_count": 5, "outputs": [] }, { @@ -374,12 +287,13 @@ "**`INFO:root:Loaded checkpoint ...`**\n", "\n", "```\n", + " Phase name:__main__\n", + " PHASE3:__main__\n", + "VoiceChanger Initialized (GPU_NUM:1, mps_enabled:False)\n", "INFO:root:Loaded checkpoint '/content/drive/MyDrive/VoiceChanger/G_326000.pth' (iteration 1136)\n", - "VoiceChanger Initialized (GPU_NUM:1)\n", " PHASE1:__main__\n", "Start MMVC SocketIO Server\n", - " CONFIG:/content/drive/MyDrive/VoiceChanger/config.json, MODEL:/content/drive/MyDrive/VoiceChanger/G_326000.pth\n", - "DEBUG:asyncio:Using selector: EpollSelector\n", + " CONFIG:/content/drive/MyDrive/VoiceChanger/config.json, MODEL:/content/drive/MyDrive/VoiceChanger/G_326000.pth ONNX_MODEL:None\n", " Phase name:MMVCServerSIO\n", " PHASE3:MMVCServerSIO\n", "INFO:root:Loaded checkpoint '/content/drive/MyDrive/VoiceChanger/G_326000.pth' (iteration 1136)\n", @@ -398,14 +312,35 @@ "PORT = 10000 + random.randint(1, 9999)\n", "LOG_FILE = f\"LOG_FILE_{PORT}\"\n", "\n", - "get_ipython().system_raw(f'python3 MMVCServerSIO.py -t MMVC -p {PORT} -c {CONFIG} -m {MODEL} --colab True >{LOG_FILE} 2>&1 &')\n", + "if \"MODEL\" in locals() and \"ONNX\" in locals():\n", + " model_param = f\" -m {MODEL} -o {ONNX}\"\n", + "elif \"MODEL\" in locals():\n", + " model_param = f\" -m {MODEL}\"\n", + "elif \"ONNX\" in locals():\n", + " model_param = f\" -o {ONNX}\"\n", + "else:\n", + " model_param = f\"\"\n", + "\n", + "get_ipython().system_raw(f'python3 MMVCServerSIO.py -t MMVC -p {PORT} -c {CONFIG} {model_param} --colab True >{LOG_FILE} 2>&1 &')\n", "#print(f\"PORT:{PORT}, LOG_FILE:{LOG_FILE}\")" ], "metadata": { - "id": "iNOAB7zISI6J" + "id": "iNOAB7zISI6J", + "outputId": "99cdd34b-cc29-4b5f-c292-151c81caa721", + "colab": { + "base_uri": "https://localhost:8080/" + } }, - "execution_count": 8, - "outputs": [] + "execution_count": 20, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "python3 MMVCServerSIO.py -t MMVC -p 16400 -c /content/drive/MyDrive/VoiceChanger/config.json -m /content/drive/MyDrive/VoiceChanger/G_326000.pth -o /content/drive/MyDrive/VoiceChanger/G_326000.onnx --colab True >LOG_FILE_16400 2>&1 &\n" + ] + } + ] }, { "cell_type": "code", @@ -418,9 +353,9 @@ "base_uri": "https://localhost:8080/" }, "id": "chu06KpAjEK6", - "outputId": "2b9883a7-b350-4958-af11-8d48ad833bbb" + "outputId": "99d8653d-5d4d-40bc-894f-60a43abf0442" }, - "execution_count": 10, + "execution_count": 13, "outputs": [ { "output_type": "stream", @@ -428,12 +363,11 @@ "text": [ "\u001b[32m Phase name:__main__\u001b[0m\n", "\u001b[32m PHASE3:__main__\u001b[0m\n", + "VoiceChanger Initialized (GPU_NUM:1, mps_enabled:False)\n", "INFO:root:Loaded checkpoint '/content/drive/MyDrive/VoiceChanger/G_326000.pth' (iteration 1136)\n", - "VoiceChanger Initialized (GPU_NUM:1)\n", "\u001b[32m PHASE1:__main__\u001b[0m\n", "\u001b[17mStart MMVC SocketIO Server\u001b[0m\n", - "\u001b[34m CONFIG:/content/drive/MyDrive/VoiceChanger/config.json, MODEL:/content/drive/MyDrive/VoiceChanger/G_326000.pth\u001b[0m\n", - "DEBUG:asyncio:Using selector: EpollSelector\n", + "\u001b[34m CONFIG:/content/drive/MyDrive/VoiceChanger/config.json, MODEL:/content/drive/MyDrive/VoiceChanger/G_326000.pth ONNX_MODEL:None\u001b[0m\n", "\u001b[32m Phase name:MMVCServerSIO\u001b[0m\n", "\u001b[32m PHASE3:MMVCServerSIO\u001b[0m\n", "INFO:root:Loaded checkpoint '/content/drive/MyDrive/VoiceChanger/G_326000.pth' (iteration 1136)\n" @@ -469,15 +403,15 @@ "base_uri": "https://localhost:8080/", "height": 34 }, - "outputId": "2cc7ba41-08d9-4582-ad15-959c741597a0" + "outputId": "2655ef55-c474-4609-e1b5-6a316dfd4a2c" }, - "execution_count": 11, + "execution_count": 14, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "https://5qu91u3piw-496ff2e9c6d22116-11612-colab.googleusercontent.com/front/\n" + "https://q27c1y5n1t-496ff2e9c6d22116-13348-colab.googleusercontent.com/front/\n" ] } ]