{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"id": "5m_Xf_2NY6mI"
},
"outputs": [],
"source": [
"import torch, torchaudio\n",
"import requests\n",
"import IPython.display as display"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"id": "GGiC0rT2hoik",
"outputId": "6c8bd028-f83c-4a37-e7ad-344deb01dc2a",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: flask in /usr/local/lib/python3.7/dist-packages (1.1.4)\n",
"Requirement already satisfied: Werkzeug<2.0,>=0.15 in /usr/local/lib/python3.7/dist-packages (from flask) (1.0.1)\n",
"Requirement already satisfied: Jinja2<3.0,>=2.10.1 in /usr/local/lib/python3.7/dist-packages (from flask) (2.11.3)\n",
"Requirement already satisfied: itsdangerous<2.0,>=0.24 in /usr/local/lib/python3.7/dist-packages (from flask) (1.1.0)\n",
"Requirement already satisfied: click<8.0,>=5.1 in /usr/local/lib/python3.7/dist-packages (from flask) (7.1.2)\n",
"Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from Jinja2<3.0,>=2.10.1->flask) (2.0.1)\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: flask_cors in /usr/local/lib/python3.7/dist-packages (3.0.10)\n",
"Requirement already satisfied: Flask>=0.9 in /usr/local/lib/python3.7/dist-packages (from flask_cors) (1.1.4)\n",
"Requirement already satisfied: Six in /usr/local/lib/python3.7/dist-packages (from flask_cors) (1.15.0)\n",
"Requirement already satisfied: click<8.0,>=5.1 in /usr/local/lib/python3.7/dist-packages (from Flask>=0.9->flask_cors) (7.1.2)\n",
"Requirement already satisfied: Jinja2<3.0,>=2.10.1 in /usr/local/lib/python3.7/dist-packages (from Flask>=0.9->flask_cors) (2.11.3)\n",
"Requirement already satisfied: Werkzeug<2.0,>=0.15 in /usr/local/lib/python3.7/dist-packages (from Flask>=0.9->flask_cors) (1.0.1)\n",
"Requirement already satisfied: itsdangerous<2.0,>=0.24 in /usr/local/lib/python3.7/dist-packages (from Flask>=0.9->flask_cors) (1.1.0)\n",
"Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from Jinja2<3.0,>=2.10.1->Flask>=0.9->flask_cors) (2.0.1)\n"
]
}
],
"source": [
"#!apt-get install -y espeak libsndfile1-dev\n",
"!pip install flask\n",
"# !pip install python-socketio\n",
"# !pip install eventlet\n",
"# !pip install unidecode\n",
"# !pip install phonemizer\n",
"# !pip install retry\n",
"!pip install flask_cors"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"id": "WO8XzrFMZGoj"
},
"outputs": [],
"source": [
"# hubert = torch.hub.load(\"bshall/hubert:main\", \"hubert_soft\").cuda()\n",
"# acoustic = torch.hub.load(\"bshall/acoustic-model:main\", \"hubert_soft\").cuda()\n",
"# hifigan = torch.hub.load(\"bshall/hifigan:main\", \"hifigan_hubert_soft\").cuda()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"id": "v7xM7CnEZMTL"
},
"outputs": [],
"source": [
"# with open(\"example.wav\", \"wb\") as file:\n",
"# response = requests.get(\"https://drive.google.com/uc?export=preview&id=1Y3KuPAhB5VcsmIaokBVKu3LUEZOfhSu8\")\n",
"# file.write(response.content)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"id": "UZox3YDVZOya"
},
"outputs": [],
"source": [
"# source, sr = torchaudio.load(\"emotion059.wav\")\n",
"# source = torchaudio.functional.resample(source, sr, 16000)\n",
"# source = source.unsqueeze(0).cuda()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "dEZ9_zCKnXpZ"
},
"source": []
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"id": "DSAA2CMfZY7C"
},
"outputs": [],
"source": [
"# with torch.inference_mode():\n",
"# # Extract speech units\n",
"# units = hubert.units(source)\n",
"# # Generate target spectrogram\n",
"# mel = acoustic.generate(units).transpose(1, 2)\n",
"# # Generate audio waveform\n",
"# target = hifigan(mel)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"id": "VCwjKdIUZZoi"
},
"outputs": [],
"source": [
"# display.Audio(target.squeeze().cpu(), rate=16000)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"id": "vjJs35ifZbSK"
},
"outputs": [],
"source": [
"# data = target.squeeze().cpu()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"id": "0SuFlButeKXG"
},
"outputs": [],
"source": [
"# display.Audio(data, rate=16000)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"id": "vFwF5Uh0eMLV"
},
"outputs": [],
"source": [
"# dest = torchaudio.functional.resample(target, 16000,24000)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"id": "eIWedhF6ebuV"
},
"outputs": [],
"source": [
"# display.Audio(dest.squeeze().cpu(), rate=24000)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"id": "XkCO-j9teccu"
},
"outputs": [],
"source": [
"# dest"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "lzo_ZWmAjaby",
"outputId": "85fb1cbe-0a09-437a-fe97-056e4360161b"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"fatal: destination path 'voice-changer' already exists and is not an empty directory.\n",
"\u001b[0m\u001b[01;34massets\u001b[0m/ \u001b[01;32mfavicon.ico\u001b[0m* \u001b[01;32mindex.js\u001b[0m*\n",
"\u001b[01;32mcoffee.png\u001b[0m* \u001b[01;32mindex.html\u001b[0m* \u001b[01;32mindex.js.LICENSE.txt\u001b[0m*\n"
]
}
],
"source": [
"# (3) リポジトリのクローン\n",
"!git clone https://github.com/w-okada/voice-changer.git\n",
"%ls voice-changer/frontend/dist\n"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"id": "8-z9j4e_j-Wb"
},
"outputs": [],
"source": [
"# (4-1) 設定ファイルの配置\n",
"!cp voice-changer/template/setting_colab.json voice-changer/frontend/dist/assets/setting.json\n"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"id": "-iPiSzvAepCl"
},
"outputs": [],
"source": [
"# (6-1) サーバの起動\n",
"PORT=8087\n",
"get_ipython().system_raw(f'python3 serverFlask.py {PORT} >foo 2>&1 &')"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "IiWSwDjQidc7",
"outputId": "d8b15e64-efbf-411a-bd76-f51e5557e077"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Using cache found in /root/.cache/torch/hub/bshall_hubert_main\n",
"Using cache found in /root/.cache/torch/hub/bshall_acoustic-model_main\n",
"Using cache found in /root/.cache/torch/hub/bshall_hifigan_main\n",
"[2022-09-15 08:22:01,380] INFO in serverFlask: INITIALIZE MODEL\n",
"[2022-09-15 08:22:01,380] INFO in serverFlask: START APP\n",
"Removing weight norm...\n",
" * Serving Flask app \"serverFlask\" (lazy loading)\n",
" * Environment: production\n",
" WARNING: This is a development server. Do not use it in a production deployment.\n",
" Use a production WSGI server instead.\n",
" * Debug mode: on\n",
"Traceback (most recent call last):\n",
" File \"serverFlask.py\", line 108, in \n",
" app.run(debug=True, host='0.0.0.0',port=PORT)\n",
" File \"/usr/local/lib/python3.7/dist-packages/flask/app.py\", line 990, in run\n",
" run_simple(host, port, self, **options)\n",
" File \"/usr/local/lib/python3.7/dist-packages/werkzeug/serving.py\", line 1030, in run_simple\n",
" s.bind(server_address)\n",
"OSError: [Errno 98] Address already in use\n"
]
}
],
"source": [
"# (6-2) サーバの起動確認 (Ctrl+Retで実行)\n",
"!cat foo"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"id": "WWn3HJlpin4R",
"outputId": "00f58001-04bf-44dc-aa8e-a2c42074e00d"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
""
],
"application/javascript": [
"(async (port, path, text, element) => {\n",
" if (!google.colab.kernel.accessAllowed) {\n",
" return;\n",
" }\n",
" element.appendChild(document.createTextNode(''));\n",
" const url = await google.colab.kernel.proxyPort(port);\n",
" const anchor = document.createElement('a');\n",
" anchor.href = new URL(path, url).toString();\n",
" anchor.target = '_blank';\n",
" anchor.setAttribute('data-href', url + path);\n",
" anchor.textContent = text;\n",
" element.appendChild(anchor);\n",
" })(8087, \"/\", \"https://localhost:8087/\", window.element)"
]
},
"metadata": {}
}
],
"source": [
"# (7) プロキシを起動\n",
"from google.colab import output\n",
"\n",
"output.serve_kernel_port_as_window(PORT)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"id": "k9OqZ-hLjKIx"
},
"outputs": [],
"source": [
"import librosa\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import torch, torchaudio\n",
"import IPython.display as display"
]
},
{
"cell_type": "code",
"source": [
"wave, sr = librosa.load(\"received_data.wav\", sr=24000)\n",
"rms = librosa.feature.rms(y=wave)\n",
"times = librosa.times_like(rms, sr=sr)\n",
"plt.plot(times, rms[0]*2**(1/2))\n",
"volume_db = 20 * np.log10(wave) \n"
],
"metadata": {
"id": "fQL4SEwaCidb",
"outputId": "6c91ef39-d020-4922-a735-5d4f7e3bef29",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 334
}
},
"execution_count": 48,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:5: RuntimeWarning: divide by zero encountered in log10\n",
" \"\"\"\n",
"/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:5: RuntimeWarning: invalid value encountered in log10\n",
" \"\"\"\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"