-
Notifications
You must be signed in to change notification settings - Fork 1
/
create_lists.py
68 lines (57 loc) · 1.73 KB
/
create_lists.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os
import glob
import numpy as np
np.random.seed(0)
# find all mfc files
DIR = 'LibriSpeech'
files = glob.glob(DIR + '/**/*.mfc', recursive=True)
# collect by user
speakers = {}
for filepath in files:
spkr = filepath.split('/')[-3]
if spkr not in speakers:
speakers[spkr] = []
speakers[spkr].append(filepath)
# choose N users for test / train
N = len(speakers) // 10
speaker_list = np.asarray(sorted(list(speakers.keys())))
np.random.shuffle(speaker_list)
# select M files for train
train_files = {}
test_files = {}
M = 5
for spkr in speaker_list[:N]:
# these are train/test speakers
files = speakers[spkr]
np.random.shuffle(files)
train_files[spkr] = files[:M]
test_files[spkr] = files[M:]
ubm_files = {}
for spkr in speaker_list[N:]:
# these are UBM speakers
files = speakers[spkr]
ubm_files[spkr] = files
PRE_PATH = '../'
ubm_to_write = ''
ubm_ind_to_write = ''
for spkr in speaker_list[N:]:
files = ubm_files[spkr]
for filepath in files:
ubm_to_write += PRE_PATH + filepath + '\n'
ubm_ind_to_write += spkr + ' ' + PRE_PATH + filepath + '\n'
with open('ubm.lst', 'w') as f:
f.write(ubm_to_write)
with open('ubm_ind.lst', 'w') as f:
f.write(ubm_ind_to_write)
with open('train.lst', 'w') as f:
for spkr in speaker_list[:N]:
for filepath in train_files[spkr]:
f.write(spkr + ' ' + PRE_PATH + filepath + '\n')
with open('test.lst', 'w') as f:
for spkr in speaker_list[:N]:
for trial in speaker_list[:N]:
files = test_files[spkr]
if spkr == trial: label = 'target'
else: label = 'imposter'
for filepath in files:
f.write(spkr + ' ' + PRE_PATH + filepath + ' ' + label + '\n')