-
Notifications
You must be signed in to change notification settings - Fork 0
/
gfa_fasta.py
19 lines (19 loc) · 865 Bytes
/
gfa_fasta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# Universitat Potsdam
# Author Gaurav Sablok
# date: 2024-2-29
def gfafastawrite(gfafile, filewrite):
"""
a gfa to fasta write for writing the graph connections for the
GFA files
"""
sort = list(pd.DataFrame([line.split("\t") for line in open(gfafile)], \
columns = ["a","b","c","d","e","f","g"])["c"])
indices = [i for i in range(len(sort)) if "+" not in sort[i] and "-" not in sort[i]]
sequences = [i for i in sort if "+" not in i and "-" not in i]
ids = list(pd.DataFrame([line.split("\t") for line in open(gfafile)], \
columns = ["a","b","c","d","e","f","g"])["b"])[0:len(indices)]
with open(filewrite, "w") as fastawrite:
for i in range(len(ids)):
fastawrite.write(">"+ids[i])
fastawrite.write(sequences[i])
fastawrite.close()