Skip to content

Commit

Permalink
minor correction
Browse files Browse the repository at this point in the history
  • Loading branch information
reJELIN committed Sep 26, 2023
1 parent 7588e7d commit 52247bc
Show file tree
Hide file tree
Showing 9 changed files with 25 additions and 32 deletions.
11 changes: 5 additions & 6 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ CONDA_DSB_ENV=str(PIPELINE_FOLDER)+"/envs/conda/environement_dsb.yaml"
CONFIG_FILE_PATH=sys.argv[6]
SING_IMG=str(PIPELINE_FOLDER)+"/envs/singularity/tree_building_container.simg"

lib_to_import=str(PIPELINE_FOLDER)+"/common/"
sys.path.append(lib_to_import)
from utils import *
#lib_to_import=str(PIPELINE_FOLDER)+"/common/"
#sys.path.append(lib_to_import)
#from utils import *

GLOBAL_TMP = config['tmp'] if 'tmp' in config else "/tmp"
if os.path.normpath(GLOBAL_TMP) != "/tmp" :
Expand All @@ -31,7 +31,7 @@ if os.path.normpath(GLOBAL_TMP) != "/tmp" :
sys.stderr.write(GLOBAL_TMP + " doesn't exist! Temporary directory is set to /tmp \n")
GLOBAL_TMP = "/tmp"

sample_list=[sample for sample in config["sample"] if sample in os.listdir(config["input_sample_path"])]
sample_list=[sample for sample in config["sample"]]

i_steps=config["steps"]

Expand All @@ -43,7 +43,7 @@ rule all:
message:
"pipeline goes all way long through your step(s)"

if "Alignment" in i_steps:
if "alignment" in i_steps:
include: "rules/alignment.smk"

if "filtering" in i_steps:
Expand All @@ -59,5 +59,4 @@ if "ALL" in i_steps:
include: "rules/ALL.smk"

if "phylogeny" in i_steps:

include: "rules/phylogeny.smk"
4 changes: 2 additions & 2 deletions rules/alignment.smk
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ rule create_panel:
time_min = (lambda wildcards, attempt: min(attempt * 10, 60))
shell:
"""
python3 {params.workflow_dir}/scripts/create_panel.py {params.config_file}
python3 {params.workflow_dir}/scripts/create_yaml.py {params.config_file}
"""


Expand All @@ -33,7 +33,7 @@ if config["type_analysis"] == "dna":
file_output=config["output_sample_path"]+"/{i_sample}"
shell:
"""
bash {params.workflow_dir}/scripts/run_dna_alignment.sh {params.file_output} {input.configfile_i}
bash {params.workflow_dir}/scripts/run_alignment_dna.sh {params.file_output} {input.configfile_i}
"""

if config["type_analysis"] == "dna+protein":
Expand Down
10 changes: 6 additions & 4 deletions rules/rule_all.smk
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,17 @@ def get_targets(steps):
except KeyError:
normalisation_list=["CLR"]

if "Alignment" in steps:
if "alignment" in steps:
if config["type_analysis"] == "dna":

target["Alignment"]=[
target["alignment"]=[
expand(config["input_sample_path"]+"/{i_sample}/{i_sample}_config_panel.yaml",i_sample=sample_list),
expand(config["output_sample_path"]+"/{i_sample}/results/{i_sample}.dna.h5",i_sample=sample_list)
]

if config["type_analysis"] == "dna+protein":
target["Alignment"]=[

target["alignment"]=[
expand(config["input_sample_path"]+"/{i_sample}/{i_sample}_config_panel.yaml",i_sample=sample_list),
expand(config["output_sample_path"]+"/{i_sample}/{i_sample}.dna+protein.h5",i_sample=sample_list)
]
Expand Down
2 changes: 1 addition & 1 deletion scripts/run_alignment_dna.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
source /mnt/beegfs/pipelines/single-cell_dna/tapestri_pipeline/v2/etc/profile.d/conda.sh
conda activate /mnt/beegfs/pipelines/single-cell_dna/tapestri_pipeline/v2
tapestri dna run --n-cores 24 --output-folder $1 --config $2 --overwrite
conda deactivate
conda deactivate
2 changes: 1 addition & 1 deletion scripts/run_alignment_dna_protein.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
source /mnt/beegfs/pipelines/single-cell_dna/tapestri_pipeline/v2/etc/profile.d/conda.sh
conda activate /mnt/beegfs/pipelines/single-cell_dna/tapestri_pipeline/v2
tapestri dna+protein run --output-folder $1 --config $2 --overwrite
conda deactivate
conda deactivate
5 changes: 1 addition & 4 deletions scripts/sc_all_assays.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,6 @@
sample.protein.cluster(attribute="umap",
method=prot_method_clustering,
**prot_res_clustering_method)

if prot_norm == "DSB":
prot_normsample.protein.layers["normalized_counts"]=sample_prot.protein.layers["normalized_counts_DSB_znorm"]


print("Making figures")
Expand Down Expand Up @@ -346,7 +343,7 @@
except KeyError:
chr_of_interest=None

if len(list_variants_of_interest) != None:
if list_variants_of_interest != None:
make_label_for_each_variants(sample,list_variants_of_interest)

cartesian_product_of_variants_of_interest=make_cartesian_product_of_variants(sample)
Expand Down
12 changes: 5 additions & 7 deletions scripts/sc_dna_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,18 +194,19 @@
df_vaf=pd.DataFrame(mean_percent_vaf,index=sample.dna.ids(),columns=["mean_vaf"])
variant_to_keep=list(df_vaf[df_vaf.mean_vaf <= int(max_vaf_percent)].index)

final_vars = list(set(list(variant_to_keep) + target_variants))
sample.dna = sample.dna[sample.dna.barcodes(), final_vars]

mio.save(sample=sample,path=args.output_h5,raw=False)

if bool_predict_missing_value == True:
print("\nImputing missing VAF & filtering germinal variants")
X=sample.dna.layers["AF_MISSING"]
X_nan = np.where(X==-50, np.nan, X)
imputer = KNNImputer(n_neighbors=5)
imputer = KNNImputer(n_neighbors=20)
imput_matrix=imputer.fit_transform(X_nan)
sample.dna.layers["AF_MISSING"]=imput_matrix

final_vars = list(set(list(variant_to_keep) + target_variants))
sample.dna = sample.dna[sample.dna.barcodes(), final_vars]

print("\nSaving Annotation")
annotation.to_csv(main_output_path+"dna/annotation/QC_annotation.csv")

Expand Down Expand Up @@ -243,9 +244,6 @@

annotation.to_csv(main_output_path+"dna/annotation/QC_advanced_annotation.csv")

mio.save(sample=sample,path=args.ouput_h5,raw=False)



chi_square_value,p_value=calculate_bartlett_sphericity(pd.DataFrame(sample.dna.layers["AF_MISSING"]))
chi_square_value, p_value
Expand Down
9 changes: 3 additions & 6 deletions scripts/sc_dna_prot.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@

sample=make_umap(sample,'protein')


for i_method in prot_clustering_method_list:
sample=make_clustering(assay=sample,arg_attribute_assay="protein",arg_method_clustering=i_method,arg_max_components=max_components)


mio.save(sample=sample,path=args.output_h5,raw=False)

for i_method in normalisation_list:
for i_method_clustering in prot_clustering_method_list:
str_path_result_path=config["output_sample_path"]+"/"+args.sample_name+"/prot/clustering/"+i_method+"/"+i_method_clustering+"/"
Expand All @@ -70,10 +70,7 @@
arg_max_components=max_components,
args_directory_result=str_path_result_path,
args_normalization=i_method)

fig=sample.protein.ridgeplot(attribute='normalized_counts_'+i_method,features=sample.protein.ids())

Path(config["output_sample_path"]+"/"+args.sample_name+"/prot/ridgleplot/").mkdir(parents=True, exist_ok=True)
fig.write_html(config["output_sample_path"]+"/"+args.sample_name+"/prot/ridgleplot/ridgleplot_+"+i_method_norm+".html")

mio.save(sample=sample,path=args.output_h5,raw=False)
fig.write_html(config["output_sample_path"]+"/"+args.sample_name+"/prot/ridgleplot/ridgleplot_+"+i_method_norm+".html")
2 changes: 1 addition & 1 deletion scripts/sc_dna_snv_cnv.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
# replace -50 value with NaN
X_nan = np.where(X==-50, np.nan, X)
# K-nearest neighbors imputer
imputer = KNNImputer(n_neighbors=5)
imputer = KNNImputer(n_neighbors=20)
imput_matrix=imputer.fit_transform(X_nan)
#replace with new predict matrix
sample.dna.layers["AF_MISSING"]=imput_matrix
Expand Down

0 comments on commit 52247bc

Please sign in to comment.