mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-02-02 16:23:58 +03:00
server resampling added
This commit is contained in:
parent
c6309b83f8
commit
15686caf50
4
client/demo/dist/index.js
vendored
4
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
@ -1,6 +1,5 @@
|
||||
import { BufferSize, DownSamplingMode, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
|
||||
import React, { useMemo, useState } from "react"
|
||||
import { ClientState } from "@dannadori/voice-changer-client-js";
|
||||
import { BufferSize, DownSamplingMode, InputSampleRate, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
|
||||
import React, { useMemo } from "react"
|
||||
import { useAppState } from "./001_provider/001_AppStateProvider";
|
||||
import { AnimationTypes, HeaderButton, HeaderButtonProps } from "./components/101_HeaderButton";
|
||||
|
||||
@ -83,6 +82,27 @@ export const useAdvancedSetting = (): AdvancedSettingState => {
|
||||
)
|
||||
}, [appState.clientSetting.setting.sampleRate, appState.clientSetting.setSampleRate])
|
||||
|
||||
const sendingSampleRateRow = useMemo(() => {
|
||||
return (
|
||||
<div className="body-row split-3-7 left-padding-1 guided">
|
||||
<div className="body-item-title left-padding-1">Sending Sample Rate</div>
|
||||
<div className="body-select-container">
|
||||
<select className="body-select" value={appState.clientSetting.setting.sendingSampleRate} onChange={(e) => {
|
||||
appState.clientSetting.setSendingSampleRate(Number(e.target.value) as InputSampleRate)
|
||||
appState.serverSetting.setInputSampleRate(Number(e.target.value) as InputSampleRate)
|
||||
|
||||
}}>
|
||||
{
|
||||
Object.values(InputSampleRate).map(x => {
|
||||
return <option key={x} value={x}>{x}</option>
|
||||
})
|
||||
}
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}, [appState.clientSetting.setting.sendingSampleRate, appState.clientSetting.setSendingSampleRate, appState.serverSetting.setInputSampleRate])
|
||||
|
||||
const bufferSizeRow = useMemo(() => {
|
||||
return (
|
||||
|
||||
@ -263,6 +283,7 @@ export const useAdvancedSetting = (): AdvancedSettingState => {
|
||||
{protocolRow}
|
||||
<div className="body-row divider"></div>
|
||||
{sampleRateRow}
|
||||
{sendingSampleRateRow}
|
||||
{bufferSizeRow}
|
||||
<div className="body-row divider"></div>
|
||||
|
||||
@ -280,7 +301,7 @@ export const useAdvancedSetting = (): AdvancedSettingState => {
|
||||
|
||||
</>
|
||||
)
|
||||
}, [mmvcServerUrlRow, protocolRow, sampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow])
|
||||
}, [mmvcServerUrlRow, protocolRow, sampleRateRow, sendingSampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow])
|
||||
|
||||
|
||||
const advancedSetting = useMemo(() => {
|
||||
|
1693
client/lib/package-lock.json
generated
1693
client/lib/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -27,7 +27,7 @@
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"@types/audioworklet": "^0.0.36",
|
||||
"@types/node": "^18.13.0",
|
||||
"@types/node": "^18.14.0",
|
||||
"@types/react": "18.0.28",
|
||||
"@types/react-dom": "18.0.11",
|
||||
"eslint": "^8.34.0",
|
||||
@ -47,9 +47,11 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@types/readable-stream": "^2.3.15",
|
||||
"amazon-chime-sdk-js": "^3.10.0",
|
||||
"amazon-chime-sdk-js": "^3.11.0",
|
||||
"install": "^0.13.0",
|
||||
"localforage": "^1.10.0",
|
||||
"microphone-stream": "^6.0.1",
|
||||
"path-browserify": "^1.0.1",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"readable-stream": "^4.3.0",
|
||||
|
@ -1,7 +1,8 @@
|
||||
import { io, Socket } from "socket.io-client";
|
||||
import { DefaultEventsMap } from "@socket.io/component-emitter";
|
||||
import { Duplex, DuplexOptions } from "readable-stream";
|
||||
import { DownSamplingMode, Protocol, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION } from "./const";
|
||||
import { DefaultVoiceChangerClientSetting, DownSamplingMode, Protocol, SendingSampleRate, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION } from "./const";
|
||||
|
||||
|
||||
export type Callbacks = {
|
||||
onVoiceReceived: (voiceChangerMode: VoiceChangerMode, data: ArrayBuffer) => void
|
||||
@ -38,7 +39,7 @@ export class AudioStreamer extends Duplex {
|
||||
// Flags
|
||||
// private downSamplingMode: DownSamplingMode = DownSamplingMode.decimate
|
||||
private downSamplingMode: DownSamplingMode = DownSamplingMode.average
|
||||
|
||||
private sendingSampleRate: number = DefaultVoiceChangerClientSetting.sendingSampleRate
|
||||
|
||||
constructor(callbacks: Callbacks, audioStreamerListeners: AudioStreamerListeners, options?: DuplexOptions) {
|
||||
super(options);
|
||||
@ -97,6 +98,9 @@ export class AudioStreamer extends Duplex {
|
||||
setDownSamplingMode = (val: DownSamplingMode) => {
|
||||
this.downSamplingMode = val
|
||||
}
|
||||
setSendingSampleRate = (val: SendingSampleRate) => {
|
||||
this.sendingSampleRate = val
|
||||
}
|
||||
|
||||
getSettings = (): AudioStreamerSettings => {
|
||||
return {
|
||||
@ -156,9 +160,12 @@ export class AudioStreamer extends Duplex {
|
||||
}
|
||||
|
||||
|
||||
private _write_realtime = (buffer: Float32Array) => {
|
||||
private _write_realtime = async (buffer: Float32Array) => {
|
||||
|
||||
let downsampledBuffer: Float32Array | null = null
|
||||
if (this.downSamplingMode == DownSamplingMode.decimate) {
|
||||
if (this.sendingSampleRate == 48000) {
|
||||
downsampledBuffer = buffer
|
||||
} else if (this.downSamplingMode == DownSamplingMode.decimate) {
|
||||
//////// (Kind 1) 間引き //////////
|
||||
// bufferSize個のデータ(48Khz)が入ってくる。
|
||||
//// 48000Hz で入ってくるので間引いて24000Hzに変換する。
|
||||
@ -170,7 +177,8 @@ export class AudioStreamer extends Duplex {
|
||||
}
|
||||
} else {
|
||||
//////// (Kind 2) 平均 //////////
|
||||
downsampledBuffer = this._averageDownsampleBuffer(buffer, 48000, 24000)
|
||||
// downsampledBuffer = this._averageDownsampleBuffer(buffer, 48000, 24000)
|
||||
downsampledBuffer = this._averageDownsampleBuffer(buffer, 48000, this.sendingSampleRate)
|
||||
}
|
||||
|
||||
// Float to signed16
|
||||
@ -184,7 +192,9 @@ export class AudioStreamer extends Duplex {
|
||||
|
||||
|
||||
// 256byte(最低バッファサイズ256から間引いた個数x2byte)をchunkとして管理
|
||||
const chunkByteSize = 256 // (const.ts ★1)
|
||||
// const chunkByteSize = 256 // (const.ts ★1)
|
||||
// const chunkByteSize = 256 * 2 // (const.ts ★1)
|
||||
const chunkByteSize = (256 * 2) * (this.sendingSampleRate / 48000) // (const.ts ★1)
|
||||
for (let i = 0; i < arrayBuffer.byteLength / chunkByteSize; i++) {
|
||||
const ab = arrayBuffer.slice(i * chunkByteSize, (i + 1) * chunkByteSize)
|
||||
this.requestChunks.push(ab)
|
||||
|
@ -3,7 +3,7 @@ import { VoiceChangerWorkletNode, VoiceChangerWorkletListener } from "./VoiceCha
|
||||
import workerjs from "raw-loader!../worklet/dist/index.js";
|
||||
import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js";
|
||||
import { createDummyMediaStream, validateUrl } from "./util";
|
||||
import { BufferSize, DefaultVoiceChangerClientSetting, DownSamplingMode, Protocol, ServerSettingKey, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION, WorkletSetting } from "./const";
|
||||
import { BufferSize, DefaultVoiceChangerClientSetting, DownSamplingMode, Protocol, SendingSampleRate, ServerSettingKey, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION, WorkletSetting } from "./const";
|
||||
import MicrophoneStream from "microphone-stream";
|
||||
import { AudioStreamer, Callbacks, AudioStreamerListeners } from "./AudioStreamer";
|
||||
import { ServerConfigurator } from "./ServerConfigurator";
|
||||
@ -255,6 +255,9 @@ export class VoiceChangerClient {
|
||||
setDownSamplingMode = (val: DownSamplingMode) => {
|
||||
this.audioStreamer.setDownSamplingMode(val)
|
||||
}
|
||||
setSendingSampleRate = (val: SendingSampleRate) => {
|
||||
this.audioStreamer.setSendingSampleRate(val)
|
||||
}
|
||||
|
||||
// configure worklet
|
||||
configureWorklet = (setting: WorkletSetting) => {
|
||||
|
@ -26,6 +26,7 @@ export type VoiceChangerServerSetting = {
|
||||
f0Detector: string // dio or harvest
|
||||
recordIO: number // 0:off, 1:on
|
||||
serverMicProps: string
|
||||
inputSampleRate: InputSampleRate
|
||||
}
|
||||
|
||||
export type VoiceChangerClientSetting = {
|
||||
@ -33,6 +34,7 @@ export type VoiceChangerClientSetting = {
|
||||
mmvcServerUrl: string,
|
||||
protocol: Protocol,
|
||||
sampleRate: SampleRate, // 48000Hz
|
||||
sendingSampleRate: SendingSampleRate,
|
||||
bufferSize: BufferSize, // 256, 512, 1024, 2048, 4096, 8192, 16384 (for mic stream)
|
||||
inputChunkNum: number, // n of (256 x n) for send buffer
|
||||
speakers: Speaker[],
|
||||
@ -83,6 +85,7 @@ export type ServerInfo = {
|
||||
f0Detector: string
|
||||
recordIO: number
|
||||
serverMicProps: string
|
||||
inputSampleRate: InputSampleRate
|
||||
}
|
||||
|
||||
export type ServerAudioDevice = {
|
||||
@ -123,6 +126,18 @@ export const SampleRate = {
|
||||
} as const
|
||||
export type SampleRate = typeof SampleRate[keyof typeof SampleRate]
|
||||
|
||||
export const SendingSampleRate = {
|
||||
"48000": 48000,
|
||||
"24000": 24000
|
||||
} as const
|
||||
export type SendingSampleRate = typeof SendingSampleRate[keyof typeof SendingSampleRate]
|
||||
|
||||
export const InputSampleRate = {
|
||||
"48000": 48000,
|
||||
"24000": 24000
|
||||
} as const
|
||||
export type InputSampleRate = typeof InputSampleRate[keyof typeof InputSampleRate]
|
||||
|
||||
export const BufferSize = {
|
||||
"256": 256,
|
||||
"512": 512,
|
||||
@ -169,6 +184,7 @@ export const ServerSettingKey = {
|
||||
"f0Detector": "f0Detector",
|
||||
"recordIO": "recordIO",
|
||||
"serverMicProps": "serverMicProps",
|
||||
"inputSampleRate": "inputSampleRate",
|
||||
} as const
|
||||
export type ServerSettingKey = typeof ServerSettingKey[keyof typeof ServerSettingKey]
|
||||
|
||||
@ -188,8 +204,8 @@ export const DefaultVoiceChangerServerSetting: VoiceChangerServerSetting = {
|
||||
onnxExecutionProvider: "CPUExecutionProvider",
|
||||
f0Detector: "dio",
|
||||
recordIO: 0,
|
||||
serverMicProps: ""
|
||||
|
||||
serverMicProps: "",
|
||||
inputSampleRate: 48000
|
||||
}
|
||||
|
||||
export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = {
|
||||
@ -197,6 +213,7 @@ export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = {
|
||||
mmvcServerUrl: "",
|
||||
protocol: "sio",
|
||||
sampleRate: 48000,
|
||||
sendingSampleRate: 48000,
|
||||
bufferSize: 1024,
|
||||
inputChunkNum: 48,
|
||||
speakers: [
|
||||
|
@ -1,6 +1,6 @@
|
||||
import { useState, useMemo, useRef, useEffect } from "react"
|
||||
|
||||
import { VoiceChangerClientSetting, Protocol, BufferSize, VoiceChangerMode, SampleRate, Speaker, DefaultVoiceChangerClientSetting, INDEXEDDB_KEY_CLIENT, Correspondence, DownSamplingMode } from "../const"
|
||||
import { VoiceChangerClientSetting, Protocol, BufferSize, VoiceChangerMode, SampleRate, Speaker, DefaultVoiceChangerClientSetting, INDEXEDDB_KEY_CLIENT, Correspondence, DownSamplingMode, SendingSampleRate } from "../const"
|
||||
import { VoiceChangerClient } from "../VoiceChangerClient"
|
||||
import { useIndexedDB } from "./useIndexedDB"
|
||||
|
||||
@ -22,6 +22,7 @@ export type ClientSettingState = {
|
||||
setInputChunkNum: (num: number) => void;
|
||||
setVoiceChangerMode: (mode: VoiceChangerMode) => void
|
||||
setDownSamplingMode: (mode: DownSamplingMode) => void
|
||||
setSendingSampleRate: (val: SendingSampleRate) => void
|
||||
setSampleRate: (num: SampleRate) => void
|
||||
setSpeakers: (speakers: Speaker[]) => void
|
||||
setCorrespondences: (file: File | null) => Promise<void>
|
||||
@ -191,6 +192,15 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
|
||||
}
|
||||
}, [props.voiceChangerClient])
|
||||
|
||||
const setSendingSampleRate = useMemo(() => {
|
||||
return (val: SendingSampleRate) => {
|
||||
if (!props.voiceChangerClient) return
|
||||
props.voiceChangerClient.setSendingSampleRate(val)
|
||||
settingRef.current.sendingSampleRate = val
|
||||
setSetting({ ...settingRef.current })
|
||||
}
|
||||
}, [props.voiceChangerClient])
|
||||
|
||||
|
||||
|
||||
const setSampleRate = useMemo(() => {
|
||||
@ -292,6 +302,7 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
|
||||
setInputChunkNum,
|
||||
setVoiceChangerMode,
|
||||
setDownSamplingMode,
|
||||
setSendingSampleRate,
|
||||
setSampleRate,
|
||||
setSpeakers,
|
||||
setCorrespondences,
|
||||
|
@ -1,5 +1,5 @@
|
||||
import { useState, useMemo, useRef, useEffect } from "react"
|
||||
import { VoiceChangerServerSetting, ServerInfo, Framework, OnnxExecutionProvider, DefaultVoiceChangerServerSetting, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA, ServerAudioDevices } from "../const"
|
||||
import { VoiceChangerServerSetting, ServerInfo, Framework, OnnxExecutionProvider, DefaultVoiceChangerServerSetting, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA, ServerAudioDevices, InputSampleRate } from "../const"
|
||||
import { VoiceChangerClient } from "../VoiceChangerClient"
|
||||
import { useIndexedDB } from "./useIndexedDB"
|
||||
|
||||
@ -52,6 +52,7 @@ export type ServerSettingState = {
|
||||
setF0Detector: (val: string) => Promise<boolean>;
|
||||
setRecordIO: (num: number) => Promise<boolean>;
|
||||
setServerMicrophone: (index: number) => Promise<boolean | undefined>
|
||||
setInputSampleRate: (num: InputSampleRate) => Promise<boolean>
|
||||
reloadServerInfo: () => Promise<void>;
|
||||
setFileUploadSetting: (val: FileUploadSetting) => void
|
||||
loadModel: () => Promise<void>
|
||||
@ -233,6 +234,12 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
return await _set_and_store(ServerSettingKey.serverMicProps, JSON.stringify(serverMicProps))
|
||||
}
|
||||
}, [props.voiceChangerClient])
|
||||
|
||||
const setInputSampleRate = useMemo(() => {
|
||||
return async (num: number) => {
|
||||
return await _set_and_store(ServerSettingKey.inputSampleRate, "" + num)
|
||||
}
|
||||
}, [props.voiceChangerClient])
|
||||
//////////////
|
||||
// 操作
|
||||
/////////////
|
||||
@ -413,6 +420,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
setF0Detector,
|
||||
setRecordIO,
|
||||
setServerMicrophone,
|
||||
setInputSampleRate,
|
||||
reloadServerInfo,
|
||||
setFileUploadSetting,
|
||||
loadModel,
|
||||
|
@ -4,6 +4,8 @@ module.exports = {
|
||||
entry: "./src/index.ts",
|
||||
resolve: {
|
||||
extensions: [".ts", ".js"],
|
||||
fallback: {
|
||||
}
|
||||
},
|
||||
module: {
|
||||
rules: [
|
||||
|
@ -30,7 +30,6 @@ class MMVC_Namespace(socketio.AsyncNamespace):
|
||||
else:
|
||||
unpackedData = np.array(struct.unpack('<%sh' % (len(data) // struct.calcsize('<h')), data))
|
||||
audio1 = self.voiceChangerManager.changeVoice(unpackedData)
|
||||
# print("sio result:", len(audio1), audio1.shape)
|
||||
bin = struct.pack('<%sh' % len(audio1), *audio1)
|
||||
await self.emit('response', [timestamp, bin], to=sid)
|
||||
|
||||
|
@ -4,6 +4,7 @@ import os
|
||||
import traceback
|
||||
import numpy as np
|
||||
from dataclasses import dataclass, asdict
|
||||
import resampy
|
||||
|
||||
import onnxruntime
|
||||
|
||||
@ -98,13 +99,14 @@ class VocieChangerSettings():
|
||||
f0Detector: str = "dio" # dio or harvest
|
||||
recordIO: int = 1 # 0:off, 1:on
|
||||
serverMicProps: str = ""
|
||||
inputSampleRate: int = 48000 # 48000 or 24000
|
||||
|
||||
pyTorchModelFile: str = ""
|
||||
onnxModelFile: str = ""
|
||||
configFile: str = ""
|
||||
|
||||
# ↓mutableな物だけ列挙
|
||||
intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize", "recordIO"]
|
||||
intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize", "recordIO", "inputSampleRate"]
|
||||
floatData = ["crossFadeOffsetRate", "crossFadeEndRate", "crossFadeOverlapRate", "f0Factor"]
|
||||
strData = ["framework", "f0Detector", "serverMicProps"]
|
||||
|
||||
@ -512,6 +514,9 @@ class VoiceChanger():
|
||||
return result
|
||||
|
||||
def on_request(self, unpackedData: any):
|
||||
if self.settings.inputSampleRate != 24000:
|
||||
print("convert sampling rate!", self.settings.inputSampleRate)
|
||||
unpackedData = resampy.resample(unpackedData, 48000, 24000)
|
||||
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
|
||||
# print("convsize:", unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate))
|
||||
if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize:
|
||||
@ -547,123 +552,3 @@ class VoiceChanger():
|
||||
self.stream_in.write(unpackedData.astype(np.int16).tobytes())
|
||||
self.stream_out.write(result.tobytes())
|
||||
return result
|
||||
|
||||
|
||||
#########################################################################################
|
||||
|
||||
|
||||
def overlap_merge(self, now_wav, prev_wav, overlap_length):
|
||||
"""
|
||||
生成したwavデータを前回生成したwavデータとoverlap_lengthだけ重ねてグラデーション的にマージします
|
||||
終端のoverlap_lengthぶんは次回マージしてから再生するので削除します
|
||||
|
||||
Parameters
|
||||
----------
|
||||
now_wav: 今回生成した音声wavデータ
|
||||
prev_wav: 前回生成した音声wavデータ
|
||||
overlap_length: 重ねる長さ
|
||||
"""
|
||||
if overlap_length == 0:
|
||||
return now_wav
|
||||
gradation = np.arange(overlap_length) / overlap_length
|
||||
now = np.frombuffer(now_wav, dtype='int16')
|
||||
prev = np.frombuffer(prev_wav, dtype='int16')
|
||||
now_head = now[:overlap_length]
|
||||
prev_tail = prev[-overlap_length:]
|
||||
print("merge params:", gradation.shape, now.shape, prev.shape, now_head.shape, prev_tail.shape)
|
||||
merged = prev_tail * (np.cos(gradation * np.pi * 0.5) ** 2) + now_head * (np.cos((1 - gradation) * np.pi * 0.5) ** 2)
|
||||
# merged = prev_tail * (1 - gradation) + now_head * gradation
|
||||
overlapped = np.append(merged, now[overlap_length:-overlap_length])
|
||||
signal = np.round(overlapped, decimals=0)
|
||||
signal = signal.astype(np.int16)
|
||||
# signal = signal.astype(np.int16).tobytes()
|
||||
return signal
|
||||
|
||||
def on_request_(self, unpackedData: any):
|
||||
|
||||
self._generate_strength(unpackedData)
|
||||
|
||||
convertSize = 8192
|
||||
unpackedData = unpackedData.astype(np.int16)
|
||||
if hasattr(self, 'stored_raw_input') == False:
|
||||
self.stored_raw_input = unpackedData
|
||||
else:
|
||||
self.stored_raw_input = np.concatenate([self.stored_raw_input, unpackedData])
|
||||
|
||||
self.stored_raw_input = self.stored_raw_input[-1 * (convertSize):]
|
||||
processing_input = self.stored_raw_input
|
||||
|
||||
print("signal_shape1", unpackedData.shape, processing_input.shape, processing_input.dtype)
|
||||
processing_input = processing_input / self.hps.data.max_wav_value
|
||||
print("type:", processing_input.dtype)
|
||||
_f0, _time = pw.dio(processing_input, self.hps.data.sampling_rate, frame_period=5.5)
|
||||
f0 = pw.stonemask(processing_input, _f0, _time, self.hps.data.sampling_rate)
|
||||
f0 = convert_continuos_f0(f0, int(processing_input.shape[0] / self.hps.data.hop_length))
|
||||
f0 = torch.from_numpy(f0.astype(np.float32))
|
||||
|
||||
print("signal_shape2", f0.shape)
|
||||
|
||||
processing_input = torch.from_numpy(processing_input.astype(np.float32)).clone()
|
||||
with torch.no_grad():
|
||||
trans_length = processing_input.size()[0]
|
||||
# spec, sid = get_audio_text_speaker_pair(signal.view(1, trans_length), Hyperparameters.SOURCE_ID)
|
||||
processing_input_v = processing_input.view(1, trans_length) # unsqueezeと同じ
|
||||
|
||||
print("processing_input_v shape:", processing_input_v.shape)
|
||||
spec = spectrogram_torch(processing_input_v, self.hps.data.filter_length,
|
||||
self.hps.data.sampling_rate, self.hps.data.hop_length, self.hps.data.win_length,
|
||||
center=False)
|
||||
spec = torch.squeeze(spec, 0)
|
||||
sid = torch.LongTensor([int(self.settings.srcId)])
|
||||
dispose_stft_specs = 2
|
||||
spec = spec[:, dispose_stft_specs:-dispose_stft_specs]
|
||||
f0 = f0[dispose_stft_specs:-dispose_stft_specs]
|
||||
print("spec shape:", spec.shape)
|
||||
data = TextAudioSpeakerCollate(
|
||||
sample_rate=self.hps.data.sampling_rate,
|
||||
hop_size=self.hps.data.hop_length,
|
||||
f0_factor=self.settings.f0Factor
|
||||
)([(spec, sid, f0)])
|
||||
|
||||
if self.settings.gpu >= 0 or self.gpu_num > 0:
|
||||
# spec, spec_lengths, sid_src, sin, d = [x.cuda(Hyperparameters.GPU_ID) for x in data]
|
||||
spec, spec_lengths, sid_src, sin, d = data
|
||||
spec = spec.cuda(self.settings.gpu)
|
||||
spec_lengths = spec_lengths.cuda(self.settings.gpu)
|
||||
sid_src = sid_src.cuda(self.settings.gpu)
|
||||
sin = sin.cuda(self.settings.gpu)
|
||||
d = tuple([d[:1].cuda(self.settings.gpu) for d in d])
|
||||
sid_target = torch.LongTensor([self.settings.dstId]).cuda(self.settings.gpu)
|
||||
audio = self.net_g.cuda(self.settings.gpu).voice_conversion(spec, spec_lengths,
|
||||
sin, d, sid_src, sid_target)[0, 0].data.cpu().float().numpy()
|
||||
else:
|
||||
spec, spec_lengths, sid_src, sin, d = data
|
||||
sid_target = torch.LongTensor([self.settings.dstId])
|
||||
audio = self.net_g.voice_conversion(spec, spec_lengths, sin, d, sid_src, sid_target)[0, 0].data.cpu().float().numpy()
|
||||
|
||||
dispose_conv1d_length = 1280
|
||||
audio = audio[dispose_conv1d_length:-dispose_conv1d_length]
|
||||
audio = audio * self.hps.data.max_wav_value
|
||||
audio = audio.astype(np.int16)
|
||||
print("fin audio shape:", audio.shape)
|
||||
audio = audio.tobytes()
|
||||
|
||||
if hasattr(self, "prev_audio"):
|
||||
try:
|
||||
audio1 = self.overlap_merge(audio, self.prev_audio, 1024)
|
||||
except:
|
||||
audio1 = np.zeros(1).astype(np.int16)
|
||||
pass
|
||||
# return np.zeros(1).astype(np.int16)
|
||||
else:
|
||||
audio1 = np.zeros(1).astype(np.int16)
|
||||
|
||||
self.prev_audio = audio
|
||||
self.out.write(audio)
|
||||
self.stream_in.write(unpackedData.tobytes())
|
||||
# print(audio1)
|
||||
|
||||
return audio1
|
||||
|
||||
def __del__(self):
|
||||
print("DESTRUCTOR")
|
||||
|
Loading…
Reference in New Issue
Block a user