-
Notifications
You must be signed in to change notification settings - Fork 3
/
barcode_cellsuspension_mapping.py
42 lines (31 loc) · 1.13 KB
/
barcode_cellsuspension_mapping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import string, sys, os
import h5py
from sets import Set
sys.path.insert(0, os.path.dirname(__file__))
cell_suspension_file = "cell_suspension_0.json"
h5_file = "filtered_gene_bc_matrices_h5.h5"
h5Group = "GRCh38"
def output(barcodes, bundle_uuid, fout):
for barcode in barcodes:
fout.write(barcode + '\t' + bundle_uuid +'\n')
if len(sys.argv[:]) != 3:
print "python barcode_cellsuspension_mapping.py inputdir barcode_cellsuspension_mapping_output"
print
sys.exit()
inputdir = sys.argv[1]
mapping_output = sys.argv[2]
fout = open(mapping_output, 'w')
for subdir in os.listdir(inputdir):
dir = inputdir + "/" + subdir
if not os.path.isdir(dir):
continue
if not os.path.exists(dir + "/" + cell_suspension_file):
continue
# import cell_suspension_id
# cell_suspension_id = cell_suspension_id.cellSuspensionID (dir + "/" + cell_suspension_file)
bundle_uuid = subdir # HCA data has duplicate cell suspension id weird, so use subdir/bundle_uuid for now
hF = h5py.File(dir + "/" + h5_file)
barcodes = hF[h5Group + "/barcodes"]
barcodes = map(lambda x: bundle_uuid + '_' + x, barcodes)
output(barcodes, bundle_uuid, fout)
fout.close()