mirror of
https://github.com/w-okada/voice-changer.git
synced 2025-02-03 08:43:57 +03:00
87 lines
3.2 KiB
Python
87 lines
3.2 KiB
Python
|
import glob
|
||
|
import sys
|
||
|
|
||
|
def read_lab(lab_f):
|
||
|
with open(lab_f, 'r') as f:
|
||
|
kw_list = f.read().split("\n")
|
||
|
|
||
|
out_phono = []
|
||
|
for i in range(len(kw_list)-1):
|
||
|
out_phono.append(kw_list[i].split()[2])
|
||
|
out_phono.append("-")
|
||
|
|
||
|
if out_phono[0] == 'silB' and out_phono[-2] == 'silE':
|
||
|
out_phono[0] = 'sil'
|
||
|
out_phono[-2] = 'sil'
|
||
|
out_phono = out_phono[0:-1]
|
||
|
out_phono_str = "".join(out_phono)
|
||
|
return out_phono_str
|
||
|
|
||
|
else:
|
||
|
print("Error!")
|
||
|
exit
|
||
|
|
||
|
def create_dataset(filename):
|
||
|
speaker_id = 0
|
||
|
textful_dir_list = glob.glob("dataset/textful/*")
|
||
|
textless_dir_list = glob.glob("dataset/textless/*")
|
||
|
textful_dir_list.sort()
|
||
|
textless_dir_list.sort()
|
||
|
Correspondence_list = list()
|
||
|
output_file_list = list()
|
||
|
output_file_list_val = list()
|
||
|
output_file_list_textless = list()
|
||
|
output_file_list_val_textless = list()
|
||
|
for d in textful_dir_list:
|
||
|
wav_file_list = glob.glob(d+"/wav/*")
|
||
|
lab_file_list = glob.glob(d + "/text/*")
|
||
|
wav_file_list.sort()
|
||
|
lab_file_list.sort()
|
||
|
if len(wav_file_list) == 0:
|
||
|
continue
|
||
|
counter = 0
|
||
|
for lab, wav in zip(lab_file_list, wav_file_list):
|
||
|
test = read_lab(lab)
|
||
|
print(wav + "|"+ str(speaker_id) + "|"+ test)
|
||
|
if counter % 10 != 0:
|
||
|
output_file_list.append(wav + "|"+ str(speaker_id) + "|"+ test + "\n")
|
||
|
else:
|
||
|
output_file_list_val.append(wav + "|"+ str(speaker_id) + "|"+ test + "\n")
|
||
|
counter = counter +1
|
||
|
Correspondence_list.append(str(speaker_id)+"|"+d + "\n")
|
||
|
speaker_id = speaker_id + 1
|
||
|
|
||
|
for d in textless_dir_list:
|
||
|
wav_file_list = glob.glob(d+"/*")
|
||
|
wav_file_list.sort()
|
||
|
counter = 0
|
||
|
for wav in wav_file_list:
|
||
|
print(wav + "|"+ str(speaker_id) + "|a")
|
||
|
if counter % 10 != 0:
|
||
|
output_file_list_textless.append(wav + "|"+ str(speaker_id) + "|a" + "\n")
|
||
|
else:
|
||
|
output_file_list_val_textless.append(wav + "|"+ str(speaker_id) + "|a" + "\n")
|
||
|
counter = counter +1
|
||
|
Correspondence_list.append(str(speaker_id)+"|"+d + "\n")
|
||
|
speaker_id = speaker_id + 1
|
||
|
|
||
|
with open('filelists/' + filename + '_textful.txt', 'w', encoding='utf-8', newline='\n') as f:
|
||
|
f.writelines(output_file_list)
|
||
|
with open('filelists/' + filename + '_textful_val.txt', 'w', encoding='utf-8', newline='\n') as f:
|
||
|
f.writelines(output_file_list_val)
|
||
|
with open('filelists/' + filename + '_textless.txt', 'w', encoding='utf-8', newline='\n') as f:
|
||
|
f.writelines(output_file_list_textless)
|
||
|
with open('filelists/' + filename + '_val_textless.txt', 'w', encoding='utf-8', newline='\n') as f:
|
||
|
f.writelines(output_file_list_val_textless)
|
||
|
with open('filelists/' + filename + '_Correspondence.txt', 'w', encoding='utf-8', newline='\n') as f:
|
||
|
f.writelines(Correspondence_list)
|
||
|
return speaker_id -1
|
||
|
|
||
|
def main(argv):
|
||
|
filename = str(sys.argv[1])
|
||
|
print(filename)
|
||
|
n_spk = create_dataset(filename)
|
||
|
return filename, n_spk
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
sys.exit(main(sys.argv))
|