Skip to content

Commit

Permalink
FASTA & BED to FASTA file converter
Browse files Browse the repository at this point in the history
This program converts FASTA genome sequences and BED queries into a single FASTA file for easy use and access. Output file is both printed and stored. 

Notable features:
- Conversion of sequence to RNA
- Parser style user input
  • Loading branch information
SohanChoudhury authored Aug 17, 2016
1 parent 00fd126 commit 7456741
Showing 1 changed file with 136 additions and 0 deletions.
136 changes: 136 additions & 0 deletions FASTA_converter_1.13.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# File: FASTA_converter_1.13.py


## This function, true to it's name, performs the main processes of the program. It reads the BED input file, while writing the output FASTA file.

def Main_process(BEDin_in, FASTAin_in, RnaDNA_in, OUTPUTin):
BEDin = BEDin_in
FASTAin = FASTAin_in
OUTPUTraw3 = RnaDNA_in


BED = open(BEDin, "r")
OUTPUT = open(OUTPUTin, "w")

line = BED.readline()

words = line.split()


if words[0][0] == '#':
line = BED.readline()
else:
A = 0


previous_word = 'NA'
Match = 0
Match1st = 0
SignMinus = "-"

while line:
words = line.split()

print( ">" + words[0] + " - " + words[1] + " - " + words[2] + " - " + words[3])
OUTPUT.write( ">" + words[0] + " - " + words[1] + " - " + words[2] + " - " + words[3] + '\n')
current_word = words[0]
start = int(words[1])
end = int(words[2])
more_info = words[3]
signout = words[4]


if previous_word == current_word:
output_print( line2[start:end], signout, SignMinus, OUTPUTraw3,OUTPUT)

else:
FASTA = open(FASTAin, "r")
line2 = FASTA.readline()

while line2:
if words[0] in line2 :
line2 = FASTA.readline()
words2 = line2.split()

output_print( line2[start:end], signout, SignMinus,OUTPUTraw3,OUTPUT)

FASTA.close()
Match = 1
break
else:
line2 = FASTA.readline()
words2 = line2.split()



previous_word = current_word
line = BED.readline()

print(' ')


print("Output saved.")
print(OUTPUTin)
print(' ')

print( "© Sohan Choudhury")
print( "Department of Hematology | Yale University")

BED.close()
OUTPUT.close()

print(' ')



## This function does two things. If the BED input line has a "-", it will find the complementary nucleotides of the corresponding sequence, then reverse it.
## If the user asks for the output FASTA file to be RNA, the function will replace all the T's in the corresponding sequence with U's.

def output_print(string_in, signout_in, SignMinus_in, OUTPUTraw3_in, OUTPUT):
if signout_in == SignMinus_in:
dict = str.maketrans("ATGC", "TACG")
value = string_in
result = value.translate(dict)
if OUTPUTraw3_in == "y":
print( result[::-1].replace("T","U"))
OUTPUT.write(result[::-1].replace("T","U") + '\n')
else:
print( result[::-1])
OUTPUT.write(result[::-1] + '\n')
else:
if OUTPUTraw3_in == "y":
print( string_in.replace("T","U"))
OUTPUT.write( string_in.replace("T","U") + '\n')
else:
print( string_in)
OUTPUT.write(string_in + '\n')



## Below is the user interface, created with argparse. The first two arguments, the input BED and FASTA files, are required. The rest are optional.

import os, argparse

parser = argparse.ArgumentParser(description='This is a bioinformatics file format converter that utilizes Python 3. Input your BED file as well as a FASTA sequence to obtain the respective genome sequence in the FASTA format. Order does not matter. Only first two fields are required. Input format: programlocation -b BEDfilelocation -f Sequencelocation -r y/n -o Output')
parser.add_argument("-b", required = True, type=argparse.FileType('r'), help="BED file *required*")
parser.add_argument("-g", required = True, type=argparse.FileType('r'), help="Genome sequence file *required*")
parser.add_argument("-r", type=str, help="RNA for output FASTA? (y/n)" , choices=['y', 'n'], nargs = '?', default = 'n')
parser.add_argument("-o", type=argparse.FileType('w'), help="Name of output file", nargs = '?')

parser.add_argument("-v", "--verbose", action="store_true",
help="increase output verbosity")
args = parser.parse_args()

if args.verbose:
print ("Arguments: ".format(args.g, args.b, args.r, args.o))

else:

if args.o:
OutputFileName = args.o.name
else:

OutputFileName = args.b.name.replace("bed", "fa")


Main_process( args.b.name, args.g.name, args.r, OutputFileName)

0 comments on commit 7456741

Please sign in to comment.