-
Notifications
You must be signed in to change notification settings - Fork 9
/
rnaseq_clipper_fasta.pl
executable file
·50 lines (45 loc) · 1004 Bytes
/
rnaseq_clipper_fasta.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/perl
$usage= "
rnaseq_clipper: selects fasta records the the specific 5'-leading sequence, trims the
leader off
arguments:
1 : fasta file name
2 : string to define the leading sequence, default \'[NATGC][ATGC][AC][AT]GGG+\'
(Illumina RNA-seq leader)
3 : (optional) keep=1|0 whether to keep reads without the leader sequence (default 0)
";
my $fq=shift or die $usage;
my $keep=0;
my $lead="[NATGC][ATGC][AC][AT]GGG+";
if ($ARGV[0]) { $lead=$ARGV[0];}
if ($ARGV[1] eq "keep=1") { $keep=1;}
my $trim=0;
my $name="";
my $name2="";
my $seq="";
my $qua="";
open INP, $fq or die "cannot open file $fq\n";
while (<INP>) {
if ($_=~/^>(.+)$/) {
$name2=$1;
if ($seq=~/^($lead)(.+)/) {
$trim=length($1);
print ">$name\n$2\n";
}
elsif ($keep) { print "$name\n$seq\n"; }
$seq="";
$ll=0;
@sites=();
$name=$name2;
}
else{
chomp;
$seq.=$_;
}
}
$name2=$1;
if ($seq=~/^($lead)(.+)/) {
$trim=length($1);
print ">$name\n$2\n";
}
elsif ($keep) { print "$name\n$seq\n"; }