-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9026eaf
commit 1271782
Showing
1 changed file
with
203 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,203 @@ | ||
#!/bin/bash | ||
|
||
set -ou pipefail | ||
|
||
### Recording the time whent he script was started | ||
|
||
start=$(date +%s) #start time of script | ||
RESTORE='\033[0m' | ||
YELLOW='\033[01;33m' | ||
|
||
echo -e \ | ||
" ________________________________________ | ||
| __ _ _ ____ ___ | | ||
| / _| __ _ ___| |_ / \ |___ \ / _ \ | | ||
| | |_ / _| / __| __| / _ \ __) | | | | | | ||
| | _| (_| \__ \ |_ / ___ \ / __/| |_| | | | ||
| |_| \__,_|___/\__/_/ \_\_____|\__\_\ | | ||
|_________________________________________| | ||
\n#\t${YELLOW}©Abhijeet Singh${RESTORE} | ||
#\t${YELLOW} [email protected]${RESTORE} | ||
" | ||
|
||
### Setting up variables | ||
|
||
input_file="" | ||
output_file="" | ||
|
||
### Username | ||
|
||
user=$(echo ${SUDO_USER:-${USER}}) | ||
|
||
### Defaults | ||
|
||
version_def="0.1.0" | ||
|
||
### Getting processors information | ||
|
||
THREADS=$(nproc 2> /dev/null || sysctl -n hw.ncpu 2> /dev/null || getconf _NPROCESSORS_ONLN 2> /dev/null) | ||
|
||
### Defining flags | ||
|
||
# function | ||
|
||
usage() { | ||
echo "Usage : $0 [-i <input file>] [-o <output file>] | ||
Example : fastA2Q -i /<input file>/ -o /<output file>/" 1>&2; exit 1; | ||
} | ||
|
||
|
||
# flags | ||
|
||
while getopts "i:o:hv" flags; | ||
do | ||
case "${flags}" in | ||
|
||
i) | ||
input_file=${OPTARG} | ||
;; | ||
|
||
o) | ||
output_file=${OPTARG} | ||
;; | ||
|
||
h) | ||
echo " | ||
Example : fastA2Q -i /<input file>/ -o /<output file>/ | ||
-i Input file | ||
-o Output file | ||
-h print Help | ||
-v print fastA2Q version" | ||
exit | ||
;; | ||
|
||
v) | ||
echo "fastA2Q version: ${version_def}" | ||
exit | ||
;; | ||
|
||
*) | ||
usage | ||
exit | ||
;; | ||
|
||
:) | ||
usage | ||
exit | ||
;; | ||
|
||
\?) | ||
usage | ||
exit | ||
;; | ||
|
||
esac | ||
done | ||
|
||
shift $((OPTIND-1)) | ||
|
||
### check | ||
|
||
if ((OPTIND == 1));then | ||
echo -e "\n#\tInput file not provided, Aborting!\n" | ||
usage | ||
exit | ||
else | ||
echo -e "\n#\tProcessing: ${input_file}\n" | ||
fi | ||
|
||
|
||
### | ||
|
||
### Checking output_file | ||
|
||
output_file_def=$(echo ${input_file} | cut -d "." -f1) | ||
|
||
if [ "$output_file" == "" ]; then | ||
output_file="${output_file_def}.fastq" | ||
echo -e "\n#\tUsing default output file: ${output_file_def}.fastq\n" | ||
else | ||
echo -e "\n#\tOutput file: ${output_file}\n" | ||
fi | ||
|
||
### | ||
|
||
# making a new file | ||
perl -pe '/^>/ ? print "\n" : chomp' ${input_file} | sed '/^$/d' | sed '/>/d' > ${input_file}.tmp | ||
|
||
|
||
### making fastq header components (constant variables) | ||
|
||
# unique instrument name | ||
place_1=`cat /dev/urandom | tr -dc 'A-Z' | fold -w 3 | head -n 1` | ||
# run id | ||
place_2=`cat /dev/urandom | tr -dc 'A-Z' | fold -w 5 | head -n 1` | ||
# flowcell id | ||
place_3=`cat /dev/urandom | tr -dc 'A-Z0-9' | fold -w 7 | head -n 1` | ||
# flowcell lane | ||
place_4=`cat /dev/urandom | tr -dc '0-9' | fold -w 5 | head -n 1` | ||
|
||
### | ||
|
||
while read line | ||
do | ||
chrlen=${#line} | ||
size=$( echo $chrlen) | ||
|
||
# change your ASCII symbol if you want | ||
# but this is already phred score 20 and above | ||
ASCII=`cat /dev/urandom | tr -dc '5678<9:;=?ABC>DEFGHI' | fold -w $size | head -n 1` | ||
|
||
### making fastq header components (variable variables) | ||
|
||
# tile number within the flowcell lane | ||
place_5=`cat /dev/urandom | tr -dc '0-9' | fold -w 4 | head -n 1` | ||
# 'x'-coordinate of the cluster within the tile | ||
place_6=`cat /dev/urandom | tr -dc '0-9' | fold -w 5 | head -n 1` | ||
# 'y'-coordinate of the cluster within the tile | ||
place_7=`cat /dev/urandom | tr -dc '0-9' | fold -w 6 | head -n 1` | ||
# the member of a pair, 1 or 2 (paired-end or mate-pair reads only) | ||
place_8=`cat /dev/urandom | tr -dc '12' | fold -w 1 | head -n 1` | ||
# Y if the read is filtered, N otherwise | ||
place_9=`cat /dev/urandom | tr -dc 'YN' | fold -w 1 | head -n 1` | ||
# control bits | ||
place_10=`cat /dev/urandom | tr -dc '06284' | fold -w 2 | head -n 1` | ||
# index 1 sequence | ||
place_11=`cat /dev/urandom | tr -dc 'ATGC' | fold -w 8 | head -n 1` | ||
# index 2 sequence | ||
place_12=`cat /dev/urandom | tr -dc 'CAGT' | fold -w 8 | head -n 1` | ||
|
||
# defining the character variable | ||
character=$( echo $ASCII) | ||
|
||
#echo -e "$line1\n$line2" |\ | ||
# sed -e "1i\@${place_1}:${place_2}:${place_3}:${place_4}:${place_5}:${place_6} | \ | ||
# :${place_7} ${place_8}:${place_9}:${place_10}${place_11}+${place_12}" >> ${output_file_def} | ||
|
||
|
||
echo -e "$line" |\ | ||
sed -e "1i@${place_1}:${place_2}:${place_3}:${place_4}:${place_5}:${place_6}:${place_7} ${place_8}:${place_9}:${place_10}:${place_11}+${place_12}" | \ | ||
#sed -e "\$a+\n$ASCII" | \ | ||
sed -e "\$a+\n$character" | \ | ||
cut -c -$size >> ${output_file} #| cut -c -$size >> | ||
|
||
|
||
done < ${input_file}.tmp | ||
rm ${input_file}.tmp | ||
|
||
################# | ||
|
||
output_file_echo=$(echo ${output_file} | rev | cut -d "/" -f1 | rev) | ||
|
||
### | ||
|
||
end=$(date +%s) # end time of script | ||
runtime=$(((end - start))) # calculate runtime | ||
|
||
### | ||
echo -e "\n#\tProcessed >>>| $YELLOW${output_file_echo}$RESTORE |<<< in {$YELLOW${runtime} sec$RESTORE}\n\n###" | ||
### | ||
|
||
|
||
### End of script |