quality control update

2025-01-23 13:35:12 +03:00 · 2023-02-15 07:18:05 +09:00 · 2023-02-15 07:18:05 +09:00 · 47489571da
commit 47489571da
parent 25a673b66f
4 changed files with 162 additions and 20 deletions
--- a/client/demo/dist/index.js
+++ b/client/demo/dist/index.js
--- a/client/demo/src/107_qulity_control.tsx
+++ b/client/demo/src/107_qulity_control.tsx
@ -1,5 +1,5 @@
 import { BufferSize, DownSamplingMode, F0Detector, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
-import React, { useMemo, useState } from "react"
+import React, { useEffect, useMemo, useState } from "react"
 import { ClientState } from "@dannadori/voice-changer-client-js";


@ -10,9 +10,31 @@ export type UseQualityControlProps = {
 export type QualityControlState = {
    qualityControl: JSX.Element;
 }
+const reloadDevices = async () => {
+    try {
+        const ms = await navigator.mediaDevices.getUserMedia({ video: false, audio: true });
+        ms.getTracks().forEach(x => { x.stop() })
+    } catch (e) {
+        console.warn("Enumerate device error::", e)
+    }
+    const mediaDeviceInfos = await navigator.mediaDevices.enumerateDevices();
+    const audioOutputs = mediaDeviceInfos.filter(x => { return x.kind == "audiooutput" })
+
+    return audioOutputs
+}
+

 export const useQualityControl = (props: UseQualityControlProps): QualityControlState => {
    const [showQualityControl, setShowQualityControl] = useState<boolean>(false)
+    const [outputAudioDeviceInfo, setOutputAudioDeviceInfo] = useState<MediaDeviceInfo[]>([])
+    const [audioOutputForGUI, setAudioOutputForGUI] = useState<string>("none")
+    useEffect(() => {
+        const initialize = async () => {
+            const audioInfo = await reloadDevices()
+            setOutputAudioDeviceInfo(audioInfo)
+        }
+        initialize()
+    }, [])


    const noiseControlRow = useMemo(() => {
@ -94,12 +116,41 @@ export const useQualityControl = (props: UseQualityControlProps): QualityControl


    const recordIORow = useMemo(() => {
+        const setReocrdIO = async (val: number) => {
+            await props.clientState.serverSetting.setRecordIO(val)
+            if (val == 0) {
+                const imageContainer = document.getElementById("quality-control-analyze-image-container") as HTMLDivElement
+                imageContainer.innerHTML = ""
+                const image = document.createElement("img")
+                image.src = "/tmp/analyze-dio.png?" + new Date().getTime()
+                imageContainer.appendChild(image)
+                const image2 = document.createElement("img")
+                image2.src = "/tmp/analyze-harvest.png?" + new Date().getTime()
+                imageContainer.appendChild(image2)
+
+                const wavContainer = document.getElementById("quality-control-analyze-wav-container") as HTMLDivElement
+                wavContainer.innerHTML = ""
+                const media1 = document.createElement("audio") as HTMLAudioElement
+                media1.src = "/tmp/in.wav?" + new Date().getTime()
+                media1.controls = true
+                // @ts-ignore
+                media1.setSinkId(audioOutputForGUI)
+                wavContainer.appendChild(media1)
+                const media2 = document.createElement("audio") as HTMLAudioElement
+                media2.src = "/tmp/out.wav?" + new Date().getTime()
+                media2.controls = true
+                // @ts-ignore
+                media2.setSinkId(audioOutputForGUI)
+                wavContainer.appendChild(media2)
+            }
+        }
        return (
+            <>
                <div className="body-row split-3-7 left-padding-1 guided">
                    <div className="body-item-title left-padding-1 ">recordIO</div>
                    <div className="body-select-container">
                        <select className="body-select" value={props.clientState.serverSetting.setting.recordIO} onChange={(e) => {
-                        props.clientState.serverSetting.setRecordIO(Number(e.target.value))
+                            setReocrdIO(Number(e.target.value))
                        }}>
                            {
                                Object.values([0, 1]).map(x => {
@ -109,8 +160,46 @@ export const useQualityControl = (props: UseQualityControlProps): QualityControl
                        </select>
                    </div>
                </div>
+                <div className="body-row split-3-7 left-padding-1 guided">
+                    <div className="body-item-title left-padding-1 ">
+                        <div>
+                            Spectrogram
+                        </div>
+                        <div>
+                            <span>(left: dio, right:harvest)</span>
+                        </div>
+                    </div>
+                    <div className="body-image-container-quality-analyze" id="quality-control-analyze-image-container">
+                    </div>
+                </div>
+                <div className="body-row split-3-7 left-padding-1 guided">
+                    <div className="body-item-title left-padding-1 ">
+                        <div>
+                            wav (left:input, right:output)
+                        </div>
+                        <select className="body-select" value={audioOutputForGUI} onChange={(e) => {
+                            setAudioOutputForGUI(e.target.value)
+                            const wavContainer = document.getElementById("quality-control-analyze-wav-container") as HTMLDivElement
+                            wavContainer.childNodes.forEach(x => {
+                                if (x instanceof HTMLAudioElement) {
+                                    //@ts-ignore
+                                    x.setSinkId(e.target.value)
+                                }
+                            })
+                        }}>
+                            {
+                                outputAudioDeviceInfo.map(x => {
+                                    return <option key={x.deviceId} value={x.deviceId}>{x.label}</option>
+                                })
+                            }
+                        </select>
+                    </div>
+                    <div className="body-wav-container-quality-analyze" id="quality-control-analyze-wav-container">
+                    </div>
+                </div>
+            </>
        )
-    }, [props.clientState.serverSetting.setting.recordIO, props.clientState.serverSetting.setRecordIO])
+    }, [props.clientState.serverSetting.setting.recordIO, props.clientState.serverSetting.setRecordIO, outputAudioDeviceInfo, audioOutputForGUI])

    const QualityControlContent = useMemo(() => {
        if (!showQualityControl) return <></>
--- a/client/demo/src/css/App.css
+++ b/client/demo/src/css/App.css
@ -444,8 +444,16 @@ body {
        }
    }
 }
-.body-select-container {
 .body-select {
    color: rgb(30, 30, 30);
+    max-width: 100%;
+}
+
+.body-image-container-quality-analyze,
+.body-wav-container-quality-analyze {
+    display: flex;
+    width: 100%;
+    & > img {
+        width: 50%;
    }
 }
--- a/server/voice_changer/VoiceChanger.py
+++ b/server/voice_changer/VoiceChanger.py
@ -22,6 +22,13 @@ providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecution

 import wave

+import matplotlib
+matplotlib.use('Agg')
+import pylab
+import librosa
+import librosa.display
+SAMPLING_RATE = 24000
+

 class MockStream:
    """
@ -194,6 +201,18 @@ class VoiceChanger():

        return data

+    def _get_f0_dio(self, y, sr=SAMPLING_RATE):
+        _f0, time = pw.dio(y, sr, frame_period=5)
+        f0 = pw.stonemask(y, _f0, time, sr)
+        time = np.linspace(0, y.shape[0] / sr, len(time))
+        return f0, time
+
+    def _get_f0_harvest(self, y, sr=SAMPLING_RATE):
+        _f0, time = pw.harvest(y, sr, frame_period=5)
+        f0 = pw.stonemask(y, _f0, time, sr)
+        time = np.linspace(0, y.shape[0] / sr, len(time))
+        return f0, time
+
    def update_setteings(self, key: str, val: any):
        if key == "onnxExecutionProvider" and self.onnx_session != None:
            if val == "CUDAExecutionProvider":
@ -215,6 +234,32 @@ class VoiceChanger():
                self.unpackedData_length = 0
            if key == "recordIO" and val == 1:
                self._setupRecordIO()
+            if key == "recordIO" and val == 0:
+                try:
+                    stream_input_file = os.path.join(TMP_DIR, "in.wav")
+                    analyze_file_dio = os.path.join(TMP_DIR, "analyze-dio.png")
+                    analyze_file_harvest = os.path.join(TMP_DIR, "analyze-harvest.png")
+                    y, sr = librosa.load(stream_input_file, SAMPLING_RATE)
+                    y = y.astype(np.float64)
+                    spec = librosa.amplitude_to_db(np.abs(librosa.stft(y, n_fft=2048, win_length=2048, hop_length=128)), ref=np.max)
+                    f0_dio, times = self._get_f0_dio(y)
+                    f0_harvest, times = self._get_f0_harvest(y)
+
+                    pylab.close()
+                    HOP_LENGTH = 128
+                    img = librosa.display.specshow(spec, sr=SAMPLING_RATE, hop_length=HOP_LENGTH, x_axis='time', y_axis='log', )
+                    pylab.plot(times, f0_dio, label='f0', color=(0, 1, 1, 0.6), linewidth=3)
+                    pylab.savefig(analyze_file_dio)
+
+                    pylab.close()
+                    HOP_LENGTH = 128
+                    img = librosa.display.specshow(spec, sr=SAMPLING_RATE, hop_length=HOP_LENGTH, x_axis='time', y_axis='log', )
+                    pylab.plot(times, f0_harvest, label='f0', color=(0, 1, 1, 0.6), linewidth=3)
+                    pylab.savefig(analyze_file_harvest)
+
+                except Exception as e:
+                    print("recordIO exception", e)
+
        elif key in self.settings.floatData:
            setattr(self.settings, key, float(val))
        elif key in self.settings.strData: