add flask

This commit is contained in:
wataru 2022-08-31 14:15:33 +09:00
parent a6e5d51597
commit 6100f2486d
8 changed files with 159 additions and 8 deletions

BIN
demo/dummy.wav Executable file

Binary file not shown.

136
demo/serverFlask.py Executable file
View File

@ -0,0 +1,136 @@
from flask import Flask, request, Markup, abort, jsonify
from flask_cors import CORS
import logging
from logging.config import dictConfig
import sys
import base64
import torch
import numpy as np
from scipy.io.wavfile import write, read
from datetime import datetime
import traceback
import struct
sys.path.append("mod")
sys.path.append("mod/text")
import utils
from data_utils import TextAudioSpeakerLoader, TextAudioSpeakerCollate
from models import SynthesizerTrn
from text.symbols import symbols
dictConfig({
'version': 1,
'formatters': {'default': {
'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
}},
'handlers': {'wsgi': {
'class': 'logging.StreamHandler',
'stream': 'ext://flask.logging.wsgi_errors_stream',
'formatter': 'default'
}},
'root': {
'level': 'INFO',
'handlers': ['wsgi']
}
})
app = Flask(__name__, static_folder="/voice-changer-internal/frontend/dist", static_url_path='/')
CORS(app, resources={r"/*": {"origins": "*"}})
class VoiceChanger():
def __init__(self, config, model):
self.hps =utils.get_hparams_from_file(config)
self.net_g = SynthesizerTrn(
len(symbols),
self.hps.data.filter_length // 2 + 1,
self.hps.train.segment_size // self.hps.data.hop_length,
n_speakers=self.hps.data.n_speakers,
**self.hps.model)
self.net_g.eval()
self.gpu_num = torch.cuda.device_count()
print("GPU_NUM:",self.gpu_num)
utils.load_checkpoint( model, self.net_g, None)
def on_request(self, gpu, srcId, dstId, timestamp, wav):
if wav==0:
samplerate, data=read("dummy.wav")
unpackedData = data
else:
unpackedData = np.array(struct.unpack('<%sh'%(len(wav) // struct.calcsize('<h') ), wav))
write("logs/received_data.wav", 24000, unpackedData.astype(np.int16))
try:
if gpu<0 or self.gpu_num==0 :
with torch.no_grad():
dataset = TextAudioSpeakerLoader("dummy.txt", self.hps.data, no_use_textfile=True)
data = dataset.get_audio_text_speaker_pair([ unpackedData, srcId, "a"])
data = TextAudioSpeakerCollate()([data])
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.cpu() for x in data]
sid_tgt1 = torch.LongTensor([dstId]).cpu()
audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0,0].data * self.hps.data.max_wav_value).cpu().float().numpy()
else:
with torch.no_grad():
dataset = TextAudioSpeakerLoader("dummy.txt", self.hps.data, no_use_textfile=True)
data = dataset.get_audio_text_speaker_pair([ unpackedData, srcId, "a"])
data = TextAudioSpeakerCollate()([data])
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.cuda(gpu) for x in data]
sid_tgt1 = torch.LongTensor([dstId]).cuda(gpu)
audio1 = (self.net_g.cuda(gpu).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0,0].data * self.hps.data.max_wav_value).cpu().float().numpy()
except Exception as e:
print("VC PROCESSING!!!! EXCEPTION!!!", e)
print(traceback.format_exc())
audio1 = audio1.astype(np.int16)
return audio1
@app.route('/test', methods=['GET', 'POST'])
def test():
try:
if request.method == 'GET':
return request.args.get('query', '')
elif request.method == 'POST':
print("POST REQUEST PROCESSING....")
gpu = int(request.json['gpu'])
srcId = int(request.json['srcId'])
dstId = int(request.json['dstId'])
timestamp = int(request.json['timestamp'])
buffer = request.json['buffer']
wav = base64.b64decode(buffer)
# print(wav)
# print(base64.b64encode(wav))
changedVoice = voiceChanger.on_request(gpu, srcId, dstId, timestamp, wav)
changedVoiceBase64 = base64.b64encode(changedVoice).decode('utf-8')
# print("changedVoice",changedVoice)
# print("CV64",changedVoiceBase64)
data = {
"gpu":gpu,
"srcId":srcId,
"dstId":dstId,
"timestamp":timestamp,
"changedVoiceBase64":changedVoiceBase64
}
return jsonify(data)
else:
return abort(400)
except Exception as e:
print("REQUEST PROCESSING!!!! EXCEPTION!!!", e)
print(traceback.format_exc())
return str(e)
if __name__ == '__main__':
args = sys.argv
PORT = args[1]
CONFIG = args[2]
MODEL = args[3]
app.logger.info('INITIALIZE MODEL')
voiceChanger = VoiceChanger(CONFIG, MODEL)
voiceChanger.on_request(0,0,0,0,0)
app.logger.info('START APP')
app.run(debug=True, host='0.0.0.0',port=PORT)

View File

@ -17,7 +17,7 @@ from text.symbols import symbols
class MyCustomNamespace(socketio.Namespace): # 名前空間を設定するクラス
class MyCustomNamespace(socketio.Namespace):
def __init__(self, namespace, config, model):
super().__init__(namespace)
self.hps =utils.get_hparams_from_file(config)
@ -36,7 +36,7 @@ class MyCustomNamespace(socketio.Namespace): # 名前空間を設定するクラ
print('[{}] connet sid : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S') , sid))
# print('[{}] connet env : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S') , environ))
def on_request_message(self, sid, msg): # 送信してきたクライアントだけにメッセージを送る関数
def on_request_message(self, sid, msg):
# print("MESSGaa", msg)
gpu = int(msg[0])
srcId = int(msg[1])
@ -88,9 +88,9 @@ if __name__ == '__main__':
print(f"start... PORT:{PORT}, CONFIG:{CONFIG}, MODEL:{MODEL}")
# sio = socketio.Server(cors_allowed_origins='http://localhost:8080')
sio = socketio.Server(cors_allowed_origins='*')
sio.register_namespace(MyCustomNamespace('/test', CONFIG, MODEL)) # 名前空間を設定
sio.register_namespace(MyCustomNamespace('/test', CONFIG, MODEL))
app = socketio.WSGIApp(sio,static_files={
'': '../frontend/dist',
}) # wsgiサーバーミドルウェア生成
eventlet.wsgi.server(eventlet.listen(('0.0.0.0',int(PORT))), app) # wsgiサーバー起動
})
eventlet.wsgi.server(eventlet.listen(('0.0.0.0',int(PORT))), app)

14
demo/setupFlask.sh Executable file
View File

@ -0,0 +1,14 @@
#!/bin/bash
echo config: $1
echo model: $2
cp -r /resources/* .
if [[ -e ./setting.json ]]; then
cp ./setting.json ../frontend/dist/assets/setting.json
fi
pip install flask
pip install flask_cors
python3 serverFlask.py 8080 $1 $2

View File

@ -1,5 +1,6 @@
{
"app_title": "voice-changer",
"majar_mode": "docker",
"voice_changer_server_url": "http://localhost:8080/test",
"sample_rate": 48000,
"buffer_size": 1024,

File diff suppressed because one or more lines are too long

View File

@ -2,7 +2,7 @@
# 参考:https://programwiz.org/2022/03/22/how-to-write-shell-script-for-option-parsing/
DOCKER_IMAGE=dannadori/voice-changer:20220829_195844
DOCKER_IMAGE=dannadori/voice-changer:20220831_141509
TENSORBOARD_PORT=6006
VOICE_CHANGER_PORT=8080

View File

@ -1,4 +1,4 @@
FROM dannadori/voice-changer-internal:20220829_195653 as front
FROM dannadori/voice-changer-internal:20220831_140605 as front
FROM debian:bullseye-slim as base
ARG DEBIAN_FRONTEND=noninteractive