-
Notifications
You must be signed in to change notification settings - Fork 9
/
step2-NEOcomputation_serial.sh
executable file
·59 lines (47 loc) · 2.28 KB
/
step2-NEOcomputation_serial.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env bash
# run VEP annotation and pVACseq in a serial way
echo ------------------------------------
echo -- Generate neoantigens serially --
echo ------------------------------------
# load arguments
source $(pwd)/inputArgs.txt
###################<<<<<<<<<<<<<<<<<<<<<< Begin
sgvcf=$OUTPUT/single_vcfs # input single vcf file directory
pvacseqRes=$OUTPUT/pvacseqResults # output file directory of pvacseq
# make sure the output dir exisit, otherwise create it
if [ ! -d "$pvacseqRes" ]; then
echo -e "\033[33m The pvacseq results directory does not exist under $OUTPUT, we will create it. \033[0m"
mkdir -p $pvacseqRes
fi
# get all input vcf filenames
filenames=$(ls "$sgvcf" | grep -E '.*\.vcf$')
# mkdir cache data which annotated and filterd thus used for pVACseq
annotated_dir=$pvacseqRes/annot_dir
mkdir -p $annotated_dir
echo -e "\033[32m Begin of VEP and pVACseq pipeline... \033[0m"
for filename in $filenames
do
source activate $py_env
sampleID=$(echo $filename | sed -E 's/(.*)\.vcf$/\1/')
printf "\rprocess %s ..\n" $sampleID
# step 1: annotate every sample with VEP, and filter (ONLY "PASS" can be used in downstream)
$vep_run --input_file $sgvcf/$filename --format vcf --output_file stdout \
--vcf --symbol --terms SO --plugin Downstream --plugin Wildtype --dir_plugins $PATH_VEP_PLUGINS --assembly $assembly_version --fasta $PATH_FASTA --dir_cache $CACHE_VEP --offline --cache_version 91 --pick --force_overwrite \
> $annotated_dir/$sampleID"_annotated_filterd.vcf"
#| $vep_filter --format vcf --force_overwrite --filter "(FILTER is PASS)" --output_file $annotated_dir/$sampleID"_annotated_filterd.vcf"
# step 2: run pvacseq
res_dir=$pvacseqRes/"res_"$sampleID
# a directory can only store one sample result !!!!!!!!
pvacseq run \
$annotated_dir/$sampleID"_annotated_filterd.vcf" \
$sampleID \
$(grep $sampleID $PATH_HLA | awk '{print $2}') \
$method $res_dir \
-e $epitope_len \
-a sample_name \
-d 500 \
--iedb-install-directory $PATH_MHC
source deactivate $py_env
done
echo -e "\033[32m End of VEP and pVACseq pipeline... \033[0m"
###################>>>>>>>>>>>>>>>>>>>>>>> End