From d65991c9069312be9bd565c13c987525bf498cb5 Mon Sep 17 00:00:00 2001 From: TheOafidian Date: Mon, 5 Jun 2023 21:09:09 +0200 Subject: [PATCH 01/20] Initial script --- modules/nf-core/metabuli/classify/main.nf | 65 +++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 modules/nf-core/metabuli/classify/main.nf diff --git a/modules/nf-core/metabuli/classify/main.nf b/modules/nf-core/metabuli/classify/main.nf new file mode 100644 index 00000000000..e6c54a51f3c --- /dev/null +++ b/modules/nf-core/metabuli/classify/main.nf @@ -0,0 +1,65 @@ +// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/modules/nf-core/ +// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta +// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty +// list (`[]`) instead of a file can be used to work around this issue. + +process METABULI_CLASSIFY { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::metabuli=1.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/metabuli:1.0.0--pl5321hf1761c0_0': + 'biocontainers/1.0.0--pl5321hf1761c0_0' }" + + input: + // TODO nf-core: Where applicable please provide/convert compressed files as input/output + // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. + tuple val(meta), path(fastas) + path(db) + + output: + tuple val(meta), path("*_classifications.tsv"), emit: classification + tuple val(meta), path("*_report.tsv"), emit: report + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input = meta.single_end ? "--seq_mode 1 ${fastas}" : "${fastas}" + // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 + // If the software is unable to output a version number on the command-line then it can be manually specified + // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf + // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive + """ + metabuli \\ + classify \\ + $args \\ + --threads $task.cpus \\ + ${input} + ${database} + ${prefix} \\ + ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + metabuli: \$(echo \$(metabuli 2>&1) | grep Version | sed 's/^metabuli Version: //;)) + END_VERSIONS + """ +} From 92cb0546ea4032478fccae16efc0fb617b452695 Mon Sep 17 00:00:00 2001 From: TheOafidian Date: Mon, 5 Jun 2023 21:24:11 +0200 Subject: [PATCH 02/20] Initial meta --- modules/nf-core/metabuli/classify/meta.yml | 55 ++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 modules/nf-core/metabuli/classify/meta.yml diff --git a/modules/nf-core/metabuli/classify/meta.yml b/modules/nf-core/metabuli/classify/meta.yml new file mode 100644 index 00000000000..ffae38def0b --- /dev/null +++ b/modules/nf-core/metabuli/classify/meta.yml @@ -0,0 +1,55 @@ +name: "metabuli_classify" +## TODO nf-core: Add a description of the module and list keywords +description: Classify FASTA files against a metabuli database +keywords: + - metabuli + - metagenomics + - profiling + - taxonomy + - k-mer + - classify + - classification +tools: + - "metabuli": + description: "Metabuli: specific and sensitive metagenomic classification via joint analysis of DNA and amino acid" + homepage: "https://github.com/steineggerlab/Metabuli" + documentation: "https://github.com/steineggerlab/Metabuli#readme" + tool_dev_url: "https://github.com/steineggerlab/Metabuli" + doi: "https://doi.org/10.1101/2023.05.31.543018" + licence: "['GPL v3']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastas: + type: file + description: single or paired FASTA files, optionally gzipped + pattern: "*.{fa,fa.gz,fasta,fasta.gz,fna,fna.gz}" + - db: + type: file + description: Metabuli databse file from build or custom made. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - classification: + type: file + description: Overview reads and their classification + pattern: "*_classifications.tsv" + - report: + type: file + description: Proportions of reads assigned to each taxon + pattern: "*_report.tsv" + +authors: + - "@TheOafidian" From 96669023a1e3f34bce53d6a921fc23c01a33f8d0 Mon Sep 17 00:00:00 2001 From: TheOafidian Date: Tue, 6 Jun 2023 01:27:51 +0200 Subject: [PATCH 03/20] Module only accepts fasta Use seqtk downstream to convert fastqs --- modules/nf-core/metabuli/classify/main.nf | 26 +--------------------- modules/nf-core/metabuli/classify/meta.yml | 13 +++++------ 2 files changed, 7 insertions(+), 32 deletions(-) diff --git a/modules/nf-core/metabuli/classify/main.nf b/modules/nf-core/metabuli/classify/main.nf index e6c54a51f3c..930f5e42287 100644 --- a/modules/nf-core/metabuli/classify/main.nf +++ b/modules/nf-core/metabuli/classify/main.nf @@ -1,20 +1,3 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process METABULI_CLASSIFY { tag "$meta.id" label 'process_medium' @@ -25,8 +8,6 @@ process METABULI_CLASSIFY { 'biocontainers/1.0.0--pl5321hf1761c0_0' }" input: - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. tuple val(meta), path(fastas) path(db) @@ -42,18 +23,13 @@ process METABULI_CLASSIFY { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def input = meta.single_end ? "--seq_mode 1 ${fastas}" : "${fastas}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive """ metabuli \\ classify \\ $args \\ --threads $task.cpus \\ ${input} - ${database} + ${db} ${prefix} \\ ${prefix} diff --git a/modules/nf-core/metabuli/classify/meta.yml b/modules/nf-core/metabuli/classify/meta.yml index ffae38def0b..9e515617a02 100644 --- a/modules/nf-core/metabuli/classify/meta.yml +++ b/modules/nf-core/metabuli/classify/meta.yml @@ -1,5 +1,4 @@ name: "metabuli_classify" -## TODO nf-core: Add a description of the module and list keywords description: Classify FASTA files against a metabuli database keywords: - metabuli @@ -15,7 +14,7 @@ tools: homepage: "https://github.com/steineggerlab/Metabuli" documentation: "https://github.com/steineggerlab/Metabuli#readme" tool_dev_url: "https://github.com/steineggerlab/Metabuli" - doi: "https://doi.org/10.1101/2023.05.31.543018" + doi: "10.1101/2023.05.31.543018" licence: "['GPL v3']" input: @@ -24,13 +23,13 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - fastas: + - fastas: type: file - description: single or paired FASTA files, optionally gzipped - pattern: "*.{fa,fa.gz,fasta,fasta.gz,fna,fna.gz}" - - db: + description: single or paired FASTA files + pattern: "*.{fa,fasta,fna}" + - db: type: file - description: Metabuli databse file from build or custom made. + description: Metabuli database file, prebuild or custom made. output: - meta: From 7a0d144c5e4eff6dd85313da980b45fbd6e852c9 Mon Sep 17 00:00:00 2001 From: TheOafidian Date: Tue, 6 Jun 2023 04:19:51 +0200 Subject: [PATCH 04/20] Start testing --- modules/nf-core/metabuli/classify/main.nf | 17 ++++--- tests/config/pytest_modules.yml | 4 ++ .../modules/nf-core/metabuli/classify/main.nf | 50 +++++++++++++++++++ .../nf-core/metabuli/classify/nextflow.config | 9 ++++ .../nf-core/metabuli/classify/test.yml | 13 +++++ 5 files changed, 85 insertions(+), 8 deletions(-) create mode 100644 tests/modules/nf-core/metabuli/classify/main.nf create mode 100644 tests/modules/nf-core/metabuli/classify/nextflow.config create mode 100644 tests/modules/nf-core/metabuli/classify/test.yml diff --git a/modules/nf-core/metabuli/classify/main.nf b/modules/nf-core/metabuli/classify/main.nf index 930f5e42287..4ed582e7614 100644 --- a/modules/nf-core/metabuli/classify/main.nf +++ b/modules/nf-core/metabuli/classify/main.nf @@ -5,15 +5,15 @@ process METABULI_CLASSIFY { conda "bioconda::metabuli=1.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/metabuli:1.0.0--pl5321hf1761c0_0': - 'biocontainers/1.0.0--pl5321hf1761c0_0' }" + 'biocontainers/metabuli:1.0.0--pl5321hf1761c0_0' }" input: tuple val(meta), path(fastas) path(db) output: - tuple val(meta), path("*_classifications.tsv"), emit: classification - tuple val(meta), path("*_report.tsv"), emit: report + tuple val(meta), path("*/*_classifications.tsv"), emit: classification + tuple val(meta), path("*/*_report.tsv"), emit: report path "versions.yml" , emit: versions when: @@ -22,20 +22,21 @@ process METABULI_CLASSIFY { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def input = meta.single_end ? "--seq_mode 1 ${fastas}" : "${fastas}" + def input = meta.single_end ? "--seq-mode 1 ${fastas.baseName}" : "${fastas.each{file -> file.baseName}}" """ + gunzip *.gz metabuli \\ classify \\ $args \\ --threads $task.cpus \\ - ${input} - ${db} - ${prefix} \\ + ${input} \\ + ${db} \\ + ${prefix}_out \\ ${prefix} cat <<-END_VERSIONS > versions.yml "${task.process}": - metabuli: \$(echo \$(metabuli 2>&1) | grep Version | sed 's/^metabuli Version: //;)) + metabuli: \$(echo \$(metabuli 2>&1) | grep Version | sed 's/^metabuli Version: //';)) END_VERSIONS """ } diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index a090ee6fec6..595fbdb4c78 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -2266,6 +2266,10 @@ metabat2/metabat2: - modules/nf-core/metabat2/metabat2/** - tests/modules/nf-core/metabat2/metabat2/** +metabuli/classify: + - modules/nf-core/metabuli/classify/** + - tests/modules/nf-core/metabuli/classify/** + metaphlan/makedb: - modules/nf-core/metaphlan/makedb/** - tests/modules/nf-core/metaphlan/makedb/** diff --git a/tests/modules/nf-core/metabuli/classify/main.nf b/tests/modules/nf-core/metabuli/classify/main.nf new file mode 100644 index 00000000000..ba6ce18c668 --- /dev/null +++ b/tests/modules/nf-core/metabuli/classify/main.nf @@ -0,0 +1,50 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SEQTK_SEQ } from '../../../../../modules/nf-core/seqtk/seq/main.nf' +include { UNTAR } from '../../../../../modules/nf-core/untar/main.nf' +include { METABULI_CLASSIFY } from '../../../../../modules/nf-core/metabuli/classify/main.nf' + +// test with single end data +workflow test_metabuli_classify_se { + + input = [ + [ id:'test_se', single_end:true ], // meta map + [ + file("${params.test_data_base}/data/genomics/sarscov2/nanopore/fastq/test_2.fastq.gz", checkIfExists: true), + ] + ] + + db_archive = [ + [ id:'test_se'], // meta map + file("refseq_virus.tar.gz",checkIfExists: true) + //file("${params.test_data_base}/data/delete_me/metabuli/classify",checkIfExists: true) + ] + + UNTAR(db_archive) + SEQTK_SEQ(input) + METABULI_CLASSIFY ( SEQTK_SEQ.out.fastx , UNTAR.out.untar.map{it[1]}) +} + +// test with paired end data +workflow test_metabuli_classify_pe { + + input = [ + [ id:'test_pe', single_end:false ], // meta map + [ + file("${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test2_1.fastq.gz", checkIfExists: true), + file("${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true), + ] + ] + + db_archive = [ + [ id:'test_pe', single_end:false ], // meta map + file("refseq_virus.tar.gz",checkIfExists: true) + //file("${params.test_data_base}/data/delete_me/metabuli/classify",checkIfExists: true) + ] + + UNTAR(db_archive) + // TODO: transform both reads to fasta prior to classification + //METABULI_CLASSIFY ( (fastas) , UNTAR.out.untar.map{it[1]}) +} diff --git a/tests/modules/nf-core/metabuli/classify/nextflow.config b/tests/modules/nf-core/metabuli/classify/nextflow.config new file mode 100644 index 00000000000..d3c9e4d170c --- /dev/null +++ b/tests/modules/nf-core/metabuli/classify/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: SEQTK_SEQ { + ext.args = "-a" + } +} + diff --git a/tests/modules/nf-core/metabuli/classify/test.yml b/tests/modules/nf-core/metabuli/classify/test.yml new file mode 100644 index 00000000000..b6777c8fcbe --- /dev/null +++ b/tests/modules/nf-core/metabuli/classify/test.yml @@ -0,0 +1,13 @@ +## TODO nf-core: Please run the following command to build this file: +# nf-core modules create-test-yml metabuli/classify +- name: "metabuli classify" + command: nextflow run ./tests/modules/nf-core/metabuli/classify -entry test_metabuli_classify -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/metabuli/classify/nextflow.config + tags: + - "metabuli" + - "metabuli/classify" + files: + - path: "output/metabuli/test.bam" + md5sum: e667c7caad0bc4b7ac383fd023c654fc + - path: "output/metabuli/versions.yml" + md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b + From 85a872031f31a050522e553e304b072f3dbcda3c Mon Sep 17 00:00:00 2001 From: Tim Date: Tue, 6 Jun 2023 19:28:00 +0200 Subject: [PATCH 05/20] Local test succeeded --- modules/nf-core/metabuli/classify/main.nf | 4 +- .../modules/nf-core/metabuli/classify/main.nf | 60 ++++++++++++------- .../nf-core/metabuli/classify/nextflow.config | 8 ++- .../nf-core/metabuli/classify/test.yml | 37 ++++++++---- 4 files changed, 75 insertions(+), 34 deletions(-) diff --git a/modules/nf-core/metabuli/classify/main.nf b/modules/nf-core/metabuli/classify/main.nf index 4ed582e7614..b2a824889ff 100644 --- a/modules/nf-core/metabuli/classify/main.nf +++ b/modules/nf-core/metabuli/classify/main.nf @@ -22,7 +22,7 @@ process METABULI_CLASSIFY { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def input = meta.single_end ? "--seq-mode 1 ${fastas.baseName}" : "${fastas.each{file -> file.baseName}}" + def input = meta.single_end ? "--seq-mode 1 ${fastas.baseName}" : "${fastas[0].baseName} ${fastas[1].baseName}" """ gunzip *.gz metabuli \\ @@ -36,7 +36,7 @@ process METABULI_CLASSIFY { cat <<-END_VERSIONS > versions.yml "${task.process}": - metabuli: \$(echo \$(metabuli 2>&1) | grep Version | sed 's/^metabuli Version: //';)) + metabuli: \$(metabuli | grep Version | sed 's/^metabuli Version: //';)) END_VERSIONS """ } diff --git a/tests/modules/nf-core/metabuli/classify/main.nf b/tests/modules/nf-core/metabuli/classify/main.nf index ba6ce18c668..ea47316a6a3 100644 --- a/tests/modules/nf-core/metabuli/classify/main.nf +++ b/tests/modules/nf-core/metabuli/classify/main.nf @@ -2,9 +2,13 @@ nextflow.enable.dsl = 2 -include { SEQTK_SEQ } from '../../../../../modules/nf-core/seqtk/seq/main.nf' +include { SEQTK_SEQ as SEQTK_SEQ_PE } from '../../../../../modules/nf-core/seqtk/seq/main.nf' +include { SEQTK_SEQ as SEQTK_SEQ_PE_RV } from '../../../../../modules/nf-core/seqtk/seq/main.nf' +include { SEQTK_SEQ as SEQTK_SEQ_SE } from '../../../../../modules/nf-core/seqtk/seq/main.nf' include { UNTAR } from '../../../../../modules/nf-core/untar/main.nf' -include { METABULI_CLASSIFY } from '../../../../../modules/nf-core/metabuli/classify/main.nf' +include { METABULI_CLASSIFY as METABULI_CLASSIFY_PE } from '../../../../../modules/nf-core/metabuli/classify/main.nf' +include { METABULI_CLASSIFY as METABULI_CLASSIFY_SE } from '../../../../../modules/nf-core/metabuli/classify/main.nf' + // test with single end data workflow test_metabuli_classify_se { @@ -16,35 +20,51 @@ workflow test_metabuli_classify_se { ] ] - db_archive = [ - [ id:'test_se'], // meta map - file("refseq_virus.tar.gz",checkIfExists: true) + db_archive = file("${params.localDir}/modules/refseq_virus.tar.gz",checkIfExists: true) //file("${params.test_data_base}/data/delete_me/metabuli/classify",checkIfExists: true) - ] - UNTAR(db_archive) - SEQTK_SEQ(input) - METABULI_CLASSIFY ( SEQTK_SEQ.out.fastx , UNTAR.out.untar.map{it[1]}) + UNTAR( [[:], db_archive]) + SEQTK_SEQ_SE(input) + METABULI_CLASSIFY_SE ( SEQTK_SEQ_SE.out.fastx , UNTAR.out.untar.map{it[1]}) } // test with paired end data workflow test_metabuli_classify_pe { - input = [ - [ id:'test_pe', single_end:false ], // meta map + input = Channel.from( + [[ id:'test_pe', single_end:false ], // meta map [ file("${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test2_1.fastq.gz", checkIfExists: true), file("${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true), - ] - ] + ]], + [[ id:'test_pe2', single_end:false ], // meta map + [ + file("${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test2_1.fastq.gz", checkIfExists: true), + file("${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true), + ]] + ) + - db_archive = [ - [ id:'test_pe', single_end:false ], // meta map - file("refseq_virus.tar.gz",checkIfExists: true) + db_archive = file("${params.localDir}/modules/refseq_virus.tar.gz",checkIfExists: true) + //TODO Replace with remote database (and make a smaller one) //file("${params.test_data_base}/data/delete_me/metabuli/classify",checkIfExists: true) - ] - UNTAR(db_archive) - // TODO: transform both reads to fasta prior to classification - //METABULI_CLASSIFY ( (fastas) , UNTAR.out.untar.map{it[1]}) + UNTAR([[:], db_archive]) + //transform pe reads to fasta prior to classification + + input.map{meta, reads -> [meta, reads[0]]} + .set{fw_reads} + + input.map{meta, reads -> [meta, reads[1]]} + .set{rv_reads} + + SEQTK_SEQ_PE(fw_reads) + + SEQTK_SEQ_PE_RV(rv_reads) + + fastas = SEQTK_SEQ_PE.out.fastx + .combine(SEQTK_SEQ_PE_RV.out.fastx, by: 0) + .map{meta, read1, read2 -> [meta, [read1, read2]]} + + METABULI_CLASSIFY_PE ( fastas , UNTAR.out.untar.map{it[1]}) } diff --git a/tests/modules/nf-core/metabuli/classify/nextflow.config b/tests/modules/nf-core/metabuli/classify/nextflow.config index d3c9e4d170c..86ef29a269e 100644 --- a/tests/modules/nf-core/metabuli/classify/nextflow.config +++ b/tests/modules/nf-core/metabuli/classify/nextflow.config @@ -1,9 +1,15 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - + localDir = "/mnt/d/tim/Projects" + withName: SEQTK_SEQ { ext.args = "-a" } + + withName: SEQTK_SEQ_PE_RV { + ext.args = "-a" + ext.prefix = "reverse" + } } diff --git a/tests/modules/nf-core/metabuli/classify/test.yml b/tests/modules/nf-core/metabuli/classify/test.yml index b6777c8fcbe..83d2927a9e9 100644 --- a/tests/modules/nf-core/metabuli/classify/test.yml +++ b/tests/modules/nf-core/metabuli/classify/test.yml @@ -1,13 +1,28 @@ -## TODO nf-core: Please run the following command to build this file: -# nf-core modules create-test-yml metabuli/classify -- name: "metabuli classify" - command: nextflow run ./tests/modules/nf-core/metabuli/classify -entry test_metabuli_classify -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/metabuli/classify/nextflow.config +- name: metabuli classify test_metabuli_classify_se + command: nextflow run ./tests/modules/nf-core/metabuli/classify -entry test_metabuli_classify_se -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/metabuli/classify/nextflow.config tags: - - "metabuli" - - "metabuli/classify" + - metabuli/classify + - metabuli files: - - path: "output/metabuli/test.bam" - md5sum: e667c7caad0bc4b7ac383fd023c654fc - - path: "output/metabuli/versions.yml" - md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b - + - path: output/metabuli/test_se_out/test_se_classifications.tsv + contains: + - "2697049" + - path: output/metabuli/test_se_out/test_se_report.tsv + md5sum: 21efd0f1c3c9b988e43e9f6c7add67ef + - path: output/metabuli/versions.yml + +- name: metabuli classify test_metabuli_classify_pe + command: nextflow run ./tests/modules/nf-core/metabuli/classify -entry test_metabuli_classify_pe -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/metabuli/classify/nextflow.config + tags: + - metabuli/classify + - metabuli + files: + - path: output/metabuli/test_pe2_out/test_pe2_classifications.tsv + md5sum: 5c47d641e35afc8b57955b5110da72f9 + - path: output/metabuli/test_pe2_out/test_pe2_report.tsv + md5sum: 3bfc704ed233bd85b8a7cca89c0b121e + - path: output/metabuli/test_pe_out/test_pe_classifications.tsv + md5sum: 5c47d641e35afc8b57955b5110da72f9 + - path: output/metabuli/test_pe_out/test_pe_report.tsv + md5sum: 3bfc704ed233bd85b8a7cca89c0b121e + - path: output/metabuli/versions.yml From 648b58ef74358903dc456d02dd94a6884b220d82 Mon Sep 17 00:00:00 2001 From: Tim Date: Wed, 7 Jun 2023 11:14:28 +0200 Subject: [PATCH 06/20] Conditional gz --- modules/nf-core/metabuli/classify/main.nf | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/metabuli/classify/main.nf b/modules/nf-core/metabuli/classify/main.nf index b2a824889ff..ec4715810e0 100644 --- a/modules/nf-core/metabuli/classify/main.nf +++ b/modules/nf-core/metabuli/classify/main.nf @@ -22,9 +22,19 @@ process METABULI_CLASSIFY { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def input = meta.single_end ? "--seq-mode 1 ${fastas.baseName}" : "${fastas[0].baseName} ${fastas[1].baseName}" + def is_compressed = meta.single_end ? fastas.getName().endsWith(".gz") : fastas[0].getName().endsWith(".gz") + def input = meta.single_end ? "--seq-mode 1 ${fastas}" : "${fastas[0]} ${fastas[1]}" + if (is_compressed && meta.single_end) { + input = "--seq-mode 1 ${fastas.baseName}" + } else if (is_compressed) { + input = "${fastas[0].baseName} ${fastas[1].baseName}" + } + """ - gunzip *.gz + if [ "$is_compressed" == "true" ]; then + gzip -d *.gz + fi + metabuli \\ classify \\ $args \\ From d2212c1b42ab82bab864a19a1af3e551577cfed8 Mon Sep 17 00:00:00 2001 From: Tim Date: Wed, 7 Jun 2023 11:46:01 +0200 Subject: [PATCH 07/20] Replaced with minimal db --- tests/modules/nf-core/metabuli/classify/test.yml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/modules/nf-core/metabuli/classify/test.yml b/tests/modules/nf-core/metabuli/classify/test.yml index 83d2927a9e9..a7bd8f85d64 100644 --- a/tests/modules/nf-core/metabuli/classify/test.yml +++ b/tests/modules/nf-core/metabuli/classify/test.yml @@ -5,10 +5,9 @@ - metabuli files: - path: output/metabuli/test_se_out/test_se_classifications.tsv - contains: - - "2697049" + md5sum: 3870c45887908e8e4b3c60c23ae3e008 - path: output/metabuli/test_se_out/test_se_report.tsv - md5sum: 21efd0f1c3c9b988e43e9f6c7add67ef + md5sum: 686930217102cd8d249906387de3e6b5 - path: output/metabuli/versions.yml - name: metabuli classify test_metabuli_classify_pe @@ -18,11 +17,11 @@ - metabuli files: - path: output/metabuli/test_pe2_out/test_pe2_classifications.tsv - md5sum: 5c47d641e35afc8b57955b5110da72f9 + md5sum: d3b961b3a1e3f1181d182583ee246523 - path: output/metabuli/test_pe2_out/test_pe2_report.tsv - md5sum: 3bfc704ed233bd85b8a7cca89c0b121e + md5sum: 0cc7f995f15304368574b4217cdd13b9 - path: output/metabuli/test_pe_out/test_pe_classifications.tsv - md5sum: 5c47d641e35afc8b57955b5110da72f9 + md5sum: d3b961b3a1e3f1181d182583ee246523 - path: output/metabuli/test_pe_out/test_pe_report.tsv - md5sum: 3bfc704ed233bd85b8a7cca89c0b121e + md5sum: 0cc7f995f15304368574b4217cdd13b9 - path: output/metabuli/versions.yml From e4ea479d5a4c3285ba1f3764b8deba518fcb67f1 Mon Sep 17 00:00:00 2001 From: Tim Date: Sun, 11 Jun 2023 04:06:01 +0200 Subject: [PATCH 08/20] Add metabuli build --- modules/nf-core/metabuli/build/main.nf | 53 +++++++++++++++++++ modules/nf-core/metabuli/build/meta.yml | 48 +++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/nf-core/metabuli/build/main.nf | 47 ++++++++++++++++ .../nf-core/metabuli/build/nextflow.config | 5 ++ 5 files changed, 157 insertions(+) create mode 100644 modules/nf-core/metabuli/build/main.nf create mode 100644 modules/nf-core/metabuli/build/meta.yml create mode 100644 tests/modules/nf-core/metabuli/build/main.nf create mode 100644 tests/modules/nf-core/metabuli/build/nextflow.config diff --git a/modules/nf-core/metabuli/build/main.nf b/modules/nf-core/metabuli/build/main.nf new file mode 100644 index 00000000000..e938e442641 --- /dev/null +++ b/modules/nf-core/metabuli/build/main.nf @@ -0,0 +1,53 @@ + +process METABULI_BUILD { + tag 'build' + label 'process_single' + + conda "bioconda::metabuli=1.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/metabuli:1.0.0--pl5321hf1761c0_0': + 'biocontainers/metabuli:1.0.0--pl5321hf1761c0_0' }" + + input: + path(genomes) + path(acc2taxid) + path(db) + + output: + path "metabuli_db.tar.gz", emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args_lib = task.ext.args_lib ?: '' + """ + ls $genomes > fastas.txt + metabuli \\ + add-to-library \\ + fastas.txt \\ + $acc2taxid \\ + $db + $args_lib + + ls $db/library > lib.txt + metabuli \\ + build \\ + --threads $task.cpus \\ + $db \\ + lib.txt \\ + $acc2taxid \\ + $args + + mkdir metabuli_db + mv $db/!(*y) metabuli_db + tar -czf "${db}.tar.gz" metabuli_db + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + metabuli: \$(metabuli | grep Version | sed 's/^metabuli Version: //';)) + END_VERSIONS + """ +} diff --git a/modules/nf-core/metabuli/build/meta.yml b/modules/nf-core/metabuli/build/meta.yml new file mode 100644 index 00000000000..b4e0b5d04e5 --- /dev/null +++ b/modules/nf-core/metabuli/build/meta.yml @@ -0,0 +1,48 @@ +name: "metabuli_build" +description: Build a database needed to use the metabuli classification tool +keywords: + - metabuli + - metagenomics + - profiling + - taxonomy + - database +tools: + - "metabuli": + description: "Metabuli: specific and sensitive metagenomic classification via joint analysis of DNA and amino acid" + homepage: "https://github.com/steineggerlab/Metabuli" + documentation: "https://github.com/steineggerlab/Metabuli#readme" + tool_dev_url: "https://github.com/steineggerlab/Metabuli" + doi: "10.1101/2023.05.31.543018" + licence: "['GPL v3']" + +input: + - genomes: + type: file + description: Genome files that serve as entries in the database + pattern: "*.{fa,fasta,fna}" + - acc2taxid: + type: file + description: | + Accession number to taxid file. + A four column tsv with the following header: + accession, accession.revision, taxid, gi. + - db: + type: directory + description: | + Output folder name for the database. + If the taxonomy-path optional parameter is not set to a directory + containing names.dmp and nodes.dmp, this directory needs + to contain a folder named taxonomy containing those two files. + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - db: + type: file + description: Compressed metabuli database + pattern: "*.tar.gz" + +authors: + - "@TheOafidian" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 595fbdb4c78..2ab1ac941d0 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -2266,6 +2266,10 @@ metabat2/metabat2: - modules/nf-core/metabat2/metabat2/** - tests/modules/nf-core/metabat2/metabat2/** +metabuli/build: + - modules/nf-core/metabuli/build/** + - tests/modules/nf-core/metabuli/build/** + metabuli/classify: - modules/nf-core/metabuli/classify/** - tests/modules/nf-core/metabuli/classify/** diff --git a/tests/modules/nf-core/metabuli/build/main.nf b/tests/modules/nf-core/metabuli/build/main.nf new file mode 100644 index 00000000000..7e7e7cfdf69 --- /dev/null +++ b/tests/modules/nf-core/metabuli/build/main.nf @@ -0,0 +1,47 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { METABULI_BUILD } from '../../../../../modules/nf-core/metabuli/build/main.nf' + +process BUILD_ACC2TAXID { + input: + path(genomes) + output: + path(acc2taxid) + + script: + """ + echo -e "accession\taccession.version\ttaxid\tgi" > acc2taxid + accessionv=\$(cat ${genomes} | head -n1 | + cut -d " " -f1 | sed 's/>//') + echo -e "\${accessionv%.*}\t\$accessionv\t2697049\t111" >> acc2taxid + """ +} + +process CREATE_TAXONOMY_FOLDER{ + input: + path(dmpfiles) + + output: + path(db) + + script: + """ + mkdir -p db/taxonomy + mv *.dmp db/taxonomy + """ + +} + +workflow test_metabuli_build { + + genome = file("${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + dmp_files = [ + file("${params.test_data_base}/data/genomics/sarscov2/metagenome/names.dmp"), + file("${params.test_data_base}/data/genomics/sarscov2/metagenome/nodes.dmp") + ] + acc2taxid = BUILD_ACC2TAXID(genome) + tax = CREATE_TAXONOMY_FOLDER(dmp_files) + METABULI_BUILD ( genome, acc2taxid, tax ) +} diff --git a/tests/modules/nf-core/metabuli/build/nextflow.config b/tests/modules/nf-core/metabuli/build/nextflow.config new file mode 100644 index 00000000000..50f50a7a357 --- /dev/null +++ b/tests/modules/nf-core/metabuli/build/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file From 3947e0d53cba176234537ee8f3cd1d561b9a08dd Mon Sep 17 00:00:00 2001 From: Tim Date: Sun, 11 Jun 2023 21:11:47 +0200 Subject: [PATCH 09/20] Add build module and tests --- modules/nf-core/metabuli/build/main.nf | 13 +++++++------ tests/modules/nf-core/metabuli/build/main.nf | 5 +++-- .../modules/nf-core/metabuli/build/nextflow.config | 9 ++++++++- tests/modules/nf-core/metabuli/build/test.yml | 8 ++++++++ 4 files changed, 26 insertions(+), 9 deletions(-) create mode 100644 tests/modules/nf-core/metabuli/build/test.yml diff --git a/modules/nf-core/metabuli/build/main.nf b/modules/nf-core/metabuli/build/main.nf index e938e442641..50f257b3de2 100644 --- a/modules/nf-core/metabuli/build/main.nf +++ b/modules/nf-core/metabuli/build/main.nf @@ -1,7 +1,7 @@ process METABULI_BUILD { tag 'build' - label 'process_single' + label 'process_medium' conda "bioconda::metabuli=1.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -23,6 +23,7 @@ process METABULI_BUILD { script: def args = task.ext.args ?: '' def args_lib = task.ext.args_lib ?: '' + def skip_lib = params.skip_lib ?: false """ ls $genomes > fastas.txt metabuli \\ @@ -32,7 +33,7 @@ process METABULI_BUILD { $db $args_lib - ls $db/library > lib.txt + ls $db/library/* > lib.txt metabuli \\ build \\ --threads $task.cpus \\ @@ -40,10 +41,10 @@ process METABULI_BUILD { lib.txt \\ $acc2taxid \\ $args - - mkdir metabuli_db - mv $db/!(*y) metabuli_db - tar -czf "${db}.tar.gz" metabuli_db + + rm -r $db/library + tar -czf metabuli_db.tar.gz $db + rm -r $db cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/tests/modules/nf-core/metabuli/build/main.nf b/tests/modules/nf-core/metabuli/build/main.nf index 7e7e7cfdf69..89075ead7d9 100644 --- a/tests/modules/nf-core/metabuli/build/main.nf +++ b/tests/modules/nf-core/metabuli/build/main.nf @@ -30,6 +30,7 @@ process CREATE_TAXONOMY_FOLDER{ """ mkdir -p db/taxonomy mv *.dmp db/taxonomy + touch db/taxonomy/merged.dmp """ } @@ -38,8 +39,8 @@ workflow test_metabuli_build { genome = file("${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta", checkIfExists: true) dmp_files = [ - file("${params.test_data_base}/data/genomics/sarscov2/metagenome/names.dmp"), - file("${params.test_data_base}/data/genomics/sarscov2/metagenome/nodes.dmp") + file("${params.test_data_metabuli}/data/genomics/sarscov2/metagenome/names.dmp"), + file("${params.test_data_metabuli}/data/genomics/sarscov2/metagenome/nodes.dmp") ] acc2taxid = BUILD_ACC2TAXID(genome) tax = CREATE_TAXONOMY_FOLDER(dmp_files) diff --git a/tests/modules/nf-core/metabuli/build/nextflow.config b/tests/modules/nf-core/metabuli/build/nextflow.config index 50f50a7a357..7c8e76ebcbf 100644 --- a/tests/modules/nf-core/metabuli/build/nextflow.config +++ b/tests/modules/nf-core/metabuli/build/nextflow.config @@ -1,5 +1,12 @@ +params { + test_data_metabuli = 'https://raw.githubusercontent.com/nf-core/test-datasets/metabuli' +} process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - + withLabel:process_medium { + cpus = 6 + memory = 36.GB + time = 8.h + } } \ No newline at end of file diff --git a/tests/modules/nf-core/metabuli/build/test.yml b/tests/modules/nf-core/metabuli/build/test.yml new file mode 100644 index 00000000000..d2143c98872 --- /dev/null +++ b/tests/modules/nf-core/metabuli/build/test.yml @@ -0,0 +1,8 @@ +- name: metabuli build test_metabuli_build + command: nextflow run ./tests/modules/nf-core/metabuli/build -entry test_metabuli_build -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/metabuli/build/nextflow.config + tags: + - metabuli + - metabuli/build + files: + - path: output/metabuli/metabuli_db.tar.gz + - path: output/metabuli/versions.yml From 15cadac7853f9addbb17f974623dc4c3075743d0 Mon Sep 17 00:00:00 2001 From: Tim Date: Thu, 15 Jun 2023 11:43:19 +0200 Subject: [PATCH 10/20] Use modules branch of testdata --- tests/modules/nf-core/metabuli/build/main.nf | 4 ++-- tests/modules/nf-core/metabuli/build/nextflow.config | 9 ++------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/tests/modules/nf-core/metabuli/build/main.nf b/tests/modules/nf-core/metabuli/build/main.nf index 89075ead7d9..c6250b23d11 100644 --- a/tests/modules/nf-core/metabuli/build/main.nf +++ b/tests/modules/nf-core/metabuli/build/main.nf @@ -39,8 +39,8 @@ workflow test_metabuli_build { genome = file("${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta", checkIfExists: true) dmp_files = [ - file("${params.test_data_metabuli}/data/genomics/sarscov2/metagenome/names.dmp"), - file("${params.test_data_metabuli}/data/genomics/sarscov2/metagenome/nodes.dmp") + file("${params.test_data_base}/delete_me/metabuli/names.dmp"), + file("${params.test_data_base}/delete_me/metabuli/nodes.dmp") ] acc2taxid = BUILD_ACC2TAXID(genome) tax = CREATE_TAXONOMY_FOLDER(dmp_files) diff --git a/tests/modules/nf-core/metabuli/build/nextflow.config b/tests/modules/nf-core/metabuli/build/nextflow.config index 7c8e76ebcbf..c070c5fe3cf 100644 --- a/tests/modules/nf-core/metabuli/build/nextflow.config +++ b/tests/modules/nf-core/metabuli/build/nextflow.config @@ -1,12 +1,7 @@ -params { - test_data_metabuli = 'https://raw.githubusercontent.com/nf-core/test-datasets/metabuli' -} process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } withLabel:process_medium { - cpus = 6 - memory = 36.GB - time = 8.h + memory = 6.5GB } -} \ No newline at end of file +} From d36fb27513622af9c679315c8547e08a9391b6e4 Mon Sep 17 00:00:00 2001 From: Tim Date: Fri, 16 Jun 2023 00:23:40 +0200 Subject: [PATCH 11/20] Build database from scratch --- .../modules/nf-core/metabuli/classify/main.nf | 27 ++++++++++++++----- .../nf-core/metabuli/classify/nextflow.config | 4 ++- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/tests/modules/nf-core/metabuli/classify/main.nf b/tests/modules/nf-core/metabuli/classify/main.nf index ea47316a6a3..d18f80abaca 100644 --- a/tests/modules/nf-core/metabuli/classify/main.nf +++ b/tests/modules/nf-core/metabuli/classify/main.nf @@ -8,6 +8,23 @@ include { SEQTK_SEQ as SEQTK_SEQ_SE } from '../../../../../modules/nf-core/seqtk include { UNTAR } from '../../../../../modules/nf-core/untar/main.nf' include { METABULI_CLASSIFY as METABULI_CLASSIFY_PE } from '../../../../../modules/nf-core/metabuli/classify/main.nf' include { METABULI_CLASSIFY as METABULI_CLASSIFY_SE } from '../../../../../modules/nf-core/metabuli/classify/main.nf' +include { BUILD_ACC2TAXID, CREATE_TAXONOMY_FOLDER } from '../build' +include { METABULI_BUILD } from '../../../../../modules/nf-core/metabuli/build/main.nf' + +workflow create_db { + genome = file("${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + dmp_files = [ + file("${params.test_data_base}/delete_me/metabuli/names.dmp"), + file("${params.test_data_base}/delete_me/metabuli/nodes.dmp") + ] + acc2taxid = BUILD_ACC2TAXID(genome) + tax = CREATE_TAXONOMY_FOLDER(dmp_files) + METABULI_BUILD ( genome, acc2taxid, tax ) + + emit: + METABULI_BUILD.out +} + // test with single end data @@ -19,10 +36,8 @@ workflow test_metabuli_classify_se { file("${params.test_data_base}/data/genomics/sarscov2/nanopore/fastq/test_2.fastq.gz", checkIfExists: true), ] ] - - db_archive = file("${params.localDir}/modules/refseq_virus.tar.gz",checkIfExists: true) - //file("${params.test_data_base}/data/delete_me/metabuli/classify",checkIfExists: true) - + + db_archive = create_db().out.db UNTAR( [[:], db_archive]) SEQTK_SEQ_SE(input) METABULI_CLASSIFY_SE ( SEQTK_SEQ_SE.out.fastx , UNTAR.out.untar.map{it[1]}) @@ -45,9 +60,7 @@ workflow test_metabuli_classify_pe { ) - db_archive = file("${params.localDir}/modules/refseq_virus.tar.gz",checkIfExists: true) - //TODO Replace with remote database (and make a smaller one) - //file("${params.test_data_base}/data/delete_me/metabuli/classify",checkIfExists: true) + db_archive = create_db().out.db UNTAR([[:], db_archive]) //transform pe reads to fasta prior to classification diff --git a/tests/modules/nf-core/metabuli/classify/nextflow.config b/tests/modules/nf-core/metabuli/classify/nextflow.config index 86ef29a269e..d83f1b3fd12 100644 --- a/tests/modules/nf-core/metabuli/classify/nextflow.config +++ b/tests/modules/nf-core/metabuli/classify/nextflow.config @@ -1,8 +1,10 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - localDir = "/mnt/d/tim/Projects" + withLabel:process_medium { + memory = 6.5GB + } withName: SEQTK_SEQ { ext.args = "-a" } From e839656c51088a8a7d89d658651a424330ccd89d Mon Sep 17 00:00:00 2001 From: TheOaphidian Date: Wed, 21 Jun 2023 12:53:19 +0200 Subject: [PATCH 12/20] Adapt location test dumpfiles --- modules/nf-core/metabuli/classify/meta.yml | 2 +- tests/modules/nf-core/metabuli/build/main.nf | 4 +-- .../nf-core/metabuli/build/nextflow.config | 5 ++-- .../modules/nf-core/metabuli/classify/main.nf | 27 +++++++------------ .../nf-core/metabuli/classify/nextflow.config | 2 +- 5 files changed, 17 insertions(+), 23 deletions(-) diff --git a/modules/nf-core/metabuli/classify/meta.yml b/modules/nf-core/metabuli/classify/meta.yml index 9e515617a02..0d8b2004023 100644 --- a/modules/nf-core/metabuli/classify/meta.yml +++ b/modules/nf-core/metabuli/classify/meta.yml @@ -26,7 +26,7 @@ input: - fastas: type: file description: single or paired FASTA files - pattern: "*.{fa,fasta,fna}" + pattern: "*.{fa,fasta,fna,fq,fastq}" - db: type: file description: Metabuli database file, prebuild or custom made. diff --git a/tests/modules/nf-core/metabuli/build/main.nf b/tests/modules/nf-core/metabuli/build/main.nf index c6250b23d11..b9f2617e29b 100644 --- a/tests/modules/nf-core/metabuli/build/main.nf +++ b/tests/modules/nf-core/metabuli/build/main.nf @@ -39,8 +39,8 @@ workflow test_metabuli_build { genome = file("${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta", checkIfExists: true) dmp_files = [ - file("${params.test_data_base}/delete_me/metabuli/names.dmp"), - file("${params.test_data_base}/delete_me/metabuli/nodes.dmp") + file("${params.test_data_base}/data/delete_me/metabuli/names.dmp"), + file("${params.test_data_base}/data/delete_me/metabuli/nodes.dmp") ] acc2taxid = BUILD_ACC2TAXID(genome) tax = CREATE_TAXONOMY_FOLDER(dmp_files) diff --git a/tests/modules/nf-core/metabuli/build/nextflow.config b/tests/modules/nf-core/metabuli/build/nextflow.config index c070c5fe3cf..90fe74d6bbd 100644 --- a/tests/modules/nf-core/metabuli/build/nextflow.config +++ b/tests/modules/nf-core/metabuli/build/nextflow.config @@ -1,7 +1,8 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withLabel:process_medium { - memory = 6.5GB + memory = 6.5.GB } -} +} diff --git a/tests/modules/nf-core/metabuli/classify/main.nf b/tests/modules/nf-core/metabuli/classify/main.nf index d18f80abaca..b8fbfa70b87 100644 --- a/tests/modules/nf-core/metabuli/classify/main.nf +++ b/tests/modules/nf-core/metabuli/classify/main.nf @@ -8,9 +8,10 @@ include { SEQTK_SEQ as SEQTK_SEQ_SE } from '../../../../../modules/nf-core/seqtk include { UNTAR } from '../../../../../modules/nf-core/untar/main.nf' include { METABULI_CLASSIFY as METABULI_CLASSIFY_PE } from '../../../../../modules/nf-core/metabuli/classify/main.nf' include { METABULI_CLASSIFY as METABULI_CLASSIFY_SE } from '../../../../../modules/nf-core/metabuli/classify/main.nf' -include { BUILD_ACC2TAXID, CREATE_TAXONOMY_FOLDER } from '../build' +include { BUILD_ACC2TAXID; CREATE_TAXONOMY_FOLDER } from '../build/main.nf' include { METABULI_BUILD } from '../../../../../modules/nf-core/metabuli/build/main.nf' + workflow create_db { genome = file("${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta", checkIfExists: true) dmp_files = [ @@ -22,11 +23,10 @@ workflow create_db { METABULI_BUILD ( genome, acc2taxid, tax ) emit: - METABULI_BUILD.out + METABULI_BUILD.out.db } - // test with single end data workflow test_metabuli_classify_se { @@ -37,10 +37,11 @@ workflow test_metabuli_classify_se { ] ] - db_archive = create_db().out.db + create_db() + db_archive = create_db.out + db_archive = METABULI_BUILD.out.db UNTAR( [[:], db_archive]) - SEQTK_SEQ_SE(input) - METABULI_CLASSIFY_SE ( SEQTK_SEQ_SE.out.fastx , UNTAR.out.untar.map{it[1]}) + METABULI_CLASSIFY_SE ( input , UNTAR.out.untar.map{it[1]}) } // test with paired end data @@ -59,8 +60,8 @@ workflow test_metabuli_classify_pe { ]] ) - - db_archive = create_db().out.db + create_db() + db_archive = create_db.out UNTAR([[:], db_archive]) //transform pe reads to fasta prior to classification @@ -71,13 +72,5 @@ workflow test_metabuli_classify_pe { input.map{meta, reads -> [meta, reads[1]]} .set{rv_reads} - SEQTK_SEQ_PE(fw_reads) - - SEQTK_SEQ_PE_RV(rv_reads) - - fastas = SEQTK_SEQ_PE.out.fastx - .combine(SEQTK_SEQ_PE_RV.out.fastx, by: 0) - .map{meta, read1, read2 -> [meta, [read1, read2]]} - - METABULI_CLASSIFY_PE ( fastas , UNTAR.out.untar.map{it[1]}) + METABULI_CLASSIFY_PE ( input , UNTAR.out.untar.map{it[1]}) } diff --git a/tests/modules/nf-core/metabuli/classify/nextflow.config b/tests/modules/nf-core/metabuli/classify/nextflow.config index d83f1b3fd12..5b77ae739ad 100644 --- a/tests/modules/nf-core/metabuli/classify/nextflow.config +++ b/tests/modules/nf-core/metabuli/classify/nextflow.config @@ -3,7 +3,7 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } withLabel:process_medium { - memory = 6.5GB + memory = 6.5.GB } withName: SEQTK_SEQ { ext.args = "-a" From c409189852105df63704e586e9716ef34aef5329 Mon Sep 17 00:00:00 2001 From: TheOaphidian Date: Wed, 21 Jun 2023 13:45:22 +0200 Subject: [PATCH 13/20] Run stub for test metabuli build requires too much RAM (aprox ~12GB) to feasibly run on GH CI containers --- modules/nf-core/metabuli/build/main.nf | 9 +++++++++ .../modules/nf-core/metabuli/build/nextflow.config | 3 --- tests/modules/nf-core/metabuli/build/test.yml | 13 +++++++++++-- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/modules/nf-core/metabuli/build/main.nf b/modules/nf-core/metabuli/build/main.nf index 50f257b3de2..b7870d1b14f 100644 --- a/modules/nf-core/metabuli/build/main.nf +++ b/modules/nf-core/metabuli/build/main.nf @@ -51,4 +51,13 @@ process METABULI_BUILD { metabuli: \$(metabuli | grep Version | sed 's/^metabuli Version: //';)) END_VERSIONS """ + + stub: + """ + touch metabuli_db.tar.gz + cat <<-END_VERSIONS > versions.yml + "${task.process}": + metabuli: \$(metabuli | grep Version | sed 's/^metabuli Version: //';)) + END_VERSIONS + """ } diff --git a/tests/modules/nf-core/metabuli/build/nextflow.config b/tests/modules/nf-core/metabuli/build/nextflow.config index 90fe74d6bbd..487e5dd5d41 100644 --- a/tests/modules/nf-core/metabuli/build/nextflow.config +++ b/tests/modules/nf-core/metabuli/build/nextflow.config @@ -2,7 +2,4 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - withLabel:process_medium { - memory = 6.5.GB - } } diff --git a/tests/modules/nf-core/metabuli/build/test.yml b/tests/modules/nf-core/metabuli/build/test.yml index d2143c98872..aadaa1b9279 100644 --- a/tests/modules/nf-core/metabuli/build/test.yml +++ b/tests/modules/nf-core/metabuli/build/test.yml @@ -1,8 +1,17 @@ - name: metabuli build test_metabuli_build - command: nextflow run ./tests/modules/nf-core/metabuli/build -entry test_metabuli_build -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/metabuli/build/nextflow.config + command: nextflow run ./tests/modules/nf-core/metabuli/build -entry test_metabuli_build -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/metabuli/build/nextflow.config -stub-run tags: - - metabuli - metabuli/build + - metabuli files: + - path: output/build/acc2taxid + md5sum: 4ae5ac3b865cb7bf301c3cc72153a7a8 + - path: output/create/db/taxonomy/merged.dmp + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/create/db/taxonomy/names.dmp + md5sum: 1cacf8dba5defe61c7b31dc4342801a0 + - path: output/create/db/taxonomy/nodes.dmp + md5sum: a41041a713e9fb2be5eac4723a421385 - path: output/metabuli/metabuli_db.tar.gz + md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/metabuli/versions.yml From e67e9f2dd196dda2219b2cb5519cb788615ffe13 Mon Sep 17 00:00:00 2001 From: TheOaphidian Date: Wed, 21 Jun 2023 13:52:23 +0200 Subject: [PATCH 14/20] Rewrite to use existing database --- .../modules/nf-core/metabuli/classify/main.nf | 39 ++----------------- 1 file changed, 3 insertions(+), 36 deletions(-) diff --git a/tests/modules/nf-core/metabuli/classify/main.nf b/tests/modules/nf-core/metabuli/classify/main.nf index b8fbfa70b87..a2366ad0c42 100644 --- a/tests/modules/nf-core/metabuli/classify/main.nf +++ b/tests/modules/nf-core/metabuli/classify/main.nf @@ -2,30 +2,9 @@ nextflow.enable.dsl = 2 -include { SEQTK_SEQ as SEQTK_SEQ_PE } from '../../../../../modules/nf-core/seqtk/seq/main.nf' -include { SEQTK_SEQ as SEQTK_SEQ_PE_RV } from '../../../../../modules/nf-core/seqtk/seq/main.nf' -include { SEQTK_SEQ as SEQTK_SEQ_SE } from '../../../../../modules/nf-core/seqtk/seq/main.nf' include { UNTAR } from '../../../../../modules/nf-core/untar/main.nf' include { METABULI_CLASSIFY as METABULI_CLASSIFY_PE } from '../../../../../modules/nf-core/metabuli/classify/main.nf' include { METABULI_CLASSIFY as METABULI_CLASSIFY_SE } from '../../../../../modules/nf-core/metabuli/classify/main.nf' -include { BUILD_ACC2TAXID; CREATE_TAXONOMY_FOLDER } from '../build/main.nf' -include { METABULI_BUILD } from '../../../../../modules/nf-core/metabuli/build/main.nf' - - -workflow create_db { - genome = file("${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta", checkIfExists: true) - dmp_files = [ - file("${params.test_data_base}/delete_me/metabuli/names.dmp"), - file("${params.test_data_base}/delete_me/metabuli/nodes.dmp") - ] - acc2taxid = BUILD_ACC2TAXID(genome) - tax = CREATE_TAXONOMY_FOLDER(dmp_files) - METABULI_BUILD ( genome, acc2taxid, tax ) - - emit: - METABULI_BUILD.out.db -} - // test with single end data workflow test_metabuli_classify_se { @@ -36,10 +15,8 @@ workflow test_metabuli_classify_se { file("${params.test_data_base}/data/genomics/sarscov2/nanopore/fastq/test_2.fastq.gz", checkIfExists: true), ] ] - - create_db() - db_archive = create_db.out - db_archive = METABULI_BUILD.out.db + + db_archive = file("${params.test_data_base}/data/delete_me/metabuli/metabuli_db.tar.gz", checkIfExists: true) UNTAR( [[:], db_archive]) METABULI_CLASSIFY_SE ( input , UNTAR.out.untar.map{it[1]}) } @@ -59,18 +36,8 @@ workflow test_metabuli_classify_pe { file("${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true), ]] ) - - create_db() - db_archive = create_db.out + db_archive = file("${params.test_data_base}/data/delete_me/metabuli/metabuli_db.tar.gz", checkIfExists: true) UNTAR([[:], db_archive]) - //transform pe reads to fasta prior to classification - - input.map{meta, reads -> [meta, reads[0]]} - .set{fw_reads} - - input.map{meta, reads -> [meta, reads[1]]} - .set{rv_reads} - METABULI_CLASSIFY_PE ( input , UNTAR.out.untar.map{it[1]}) } From 09815c8157b48bf92bdba23c4162830fb1e1bffe Mon Sep 17 00:00:00 2001 From: Tim Date: Wed, 19 Jul 2023 10:15:32 +0200 Subject: [PATCH 15/20] Update paths and min memory needed --- tests/modules/nf-core/metabuli/build/main.nf | 23 ++++--------------- .../nf-core/metabuli/build/nextflow.config | 3 ++- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/tests/modules/nf-core/metabuli/build/main.nf b/tests/modules/nf-core/metabuli/build/main.nf index b9f2617e29b..b665bafd1fc 100644 --- a/tests/modules/nf-core/metabuli/build/main.nf +++ b/tests/modules/nf-core/metabuli/build/main.nf @@ -4,21 +4,6 @@ nextflow.enable.dsl = 2 include { METABULI_BUILD } from '../../../../../modules/nf-core/metabuli/build/main.nf' -process BUILD_ACC2TAXID { - input: - path(genomes) - output: - path(acc2taxid) - - script: - """ - echo -e "accession\taccession.version\ttaxid\tgi" > acc2taxid - accessionv=\$(cat ${genomes} | head -n1 | - cut -d " " -f1 | sed 's/>//') - echo -e "\${accessionv%.*}\t\$accessionv\t2697049\t111" >> acc2taxid - """ -} - process CREATE_TAXONOMY_FOLDER{ input: path(dmpfiles) @@ -38,11 +23,13 @@ process CREATE_TAXONOMY_FOLDER{ workflow test_metabuli_build { genome = file("${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + dmp_files = [ - file("${params.test_data_base}/data/delete_me/metabuli/names.dmp"), - file("${params.test_data_base}/data/delete_me/metabuli/nodes.dmp") + file("${params.test_data_base}/data/delete_me/metabuli/taxonomy/names.dmp"), + file("${params.test_data_base}/data/delete_me/metabuli/taxonomy/nodes.dmp") ] - acc2taxid = BUILD_ACC2TAXID(genome) + acc2taxid = file("${params.test_data_base}/data/delete_me/metabuli/acc2taxid") + tax = CREATE_TAXONOMY_FOLDER(dmp_files) METABULI_BUILD ( genome, acc2taxid, tax ) } diff --git a/tests/modules/nf-core/metabuli/build/nextflow.config b/tests/modules/nf-core/metabuli/build/nextflow.config index 487e5dd5d41..5c70d71b82f 100644 --- a/tests/modules/nf-core/metabuli/build/nextflow.config +++ b/tests/modules/nf-core/metabuli/build/nextflow.config @@ -1,5 +1,6 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - + memory = "16GB" + } From 8f30f74f916f08c53a716789140bd574f946e774 Mon Sep 17 00:00:00 2001 From: Joon-Klaps Date: Fri, 21 Jun 2024 08:14:31 +0000 Subject: [PATCH 16/20] init nf-test & add add module --- modules/nf-core/metabuli/add/environment.yml | 9 + modules/nf-core/metabuli/add/main.nf | 57 ++ modules/nf-core/metabuli/add/meta.yml | 57 ++ .../nf-core/metabuli/add/tests/main.nf.test | 74 +++ modules/nf-core/metabuli/add/tests/tags.yml | 2 + .../nf-core/metabuli/build/environment.yml | 6 + modules/nf-core/metabuli/build/main.nf | 12 +- modules/nf-core/metabuli/build/meta.yml | 11 +- .../nf-core/metabuli/build/tests/main.nf.test | 597 ++++++++++++++++++ .../metabuli/build/tests/main.nf.test.snap | 184 ++++++ modules/nf-core/metabuli/build/tests/tags.yml | 2 + .../nf-core/metabuli/classify/environment.yml | 6 + modules/nf-core/metabuli/classify/main.nf | 10 +- modules/nf-core/metabuli/classify/meta.yml | 3 +- 14 files changed, 1013 insertions(+), 17 deletions(-) create mode 100644 modules/nf-core/metabuli/add/environment.yml create mode 100644 modules/nf-core/metabuli/add/main.nf create mode 100644 modules/nf-core/metabuli/add/meta.yml create mode 100644 modules/nf-core/metabuli/add/tests/main.nf.test create mode 100644 modules/nf-core/metabuli/add/tests/tags.yml create mode 100644 modules/nf-core/metabuli/build/environment.yml create mode 100644 modules/nf-core/metabuli/build/tests/main.nf.test create mode 100644 modules/nf-core/metabuli/build/tests/main.nf.test.snap create mode 100644 modules/nf-core/metabuli/build/tests/tags.yml create mode 100644 modules/nf-core/metabuli/classify/environment.yml diff --git a/modules/nf-core/metabuli/add/environment.yml b/modules/nf-core/metabuli/add/environment.yml new file mode 100644 index 00000000000..44ccbb344ec --- /dev/null +++ b/modules/nf-core/metabuli/add/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "metabuli_add" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::metabuli=1.0.5" diff --git a/modules/nf-core/metabuli/add/main.nf b/modules/nf-core/metabuli/add/main.nf new file mode 100644 index 00000000000..41c657fe373 --- /dev/null +++ b/modules/nf-core/metabuli/add/main.nf @@ -0,0 +1,57 @@ +process METABULI_ADD { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/metabuli:1.0.5--pl5321h6a68c12_1': + 'biocontainers/metabuli:1.0.5--pl5321h6a68c12_1' }" + + input: + tuple val(meta), path(fasta) + path taxonomy_names, stageAs: 'taxonomy/names.dmp' + path taxonomy_nodes, stageAs: 'taxonomy/nodes.dmp' + path accession2taxid, stageAs: 'taxonomy/*' + + output: + tuple val(meta), path("${prefix}"), emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + samtools \\ + sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + metabuli: \$(samtools --version |& sed '1!d ; s/samtools //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // TODO nf-core: A stub section should mimic the execution of the original module as best as possible + // Have a look at the following examples: + // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 + // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + metabuli: \$(samtools --version |& sed '1!d ; s/samtools //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/metabuli/add/meta.yml b/modules/nf-core/metabuli/add/meta.yml new file mode 100644 index 00000000000..0537f1e027d --- /dev/null +++ b/modules/nf-core/metabuli/add/meta.yml @@ -0,0 +1,57 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "metabuli_add" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort + - example + - genomics +tools: + - "metabuli": + ## TODO nf-core: Add a description and other details for the software below + description: "Metabuli: specific and sensitive metagenomic classification via joint analysis of DNA and amino acid" + homepage: "None" + documentation: "None" + tool_dev_url: "None" + doi: "" + licence: ['GPL v3'] + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +## TODO nf-core: Add a description of all of the variables used as output +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + ## TODO nf-core: Delete / customise this example output + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +authors: + - "@Joon-Klaps" +maintainers: + - "@Joon-Klaps" diff --git a/modules/nf-core/metabuli/add/tests/main.nf.test b/modules/nf-core/metabuli/add/tests/main.nf.test new file mode 100644 index 00000000000..e2af01e861a --- /dev/null +++ b/modules/nf-core/metabuli/add/tests/main.nf.test @@ -0,0 +1,74 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core modules test metabuli/add +nextflow_process { + + name "Test Process METABULI_ADD" + script "../main.nf" + process "METABULI_ADD" + + tag "modules" + tag "modules_nfcore" + tag "metabuli" + tag "metabuli/add" + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used + test("sarscov2 - bam") { + + // TODO nf-core: If you are created a test for a chained module + // (the module requires running more than one process to generate the required output) + // add the 'setup' method here. + // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules). + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. + ) + } + + } + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + ) + } + + } + +} diff --git a/modules/nf-core/metabuli/add/tests/tags.yml b/modules/nf-core/metabuli/add/tests/tags.yml new file mode 100644 index 00000000000..5fb30a9102f --- /dev/null +++ b/modules/nf-core/metabuli/add/tests/tags.yml @@ -0,0 +1,2 @@ +metabuli/add: + - "modules/nf-core/metabuli/add/**" diff --git a/modules/nf-core/metabuli/build/environment.yml b/modules/nf-core/metabuli/build/environment.yml new file mode 100644 index 00000000000..b7c37e88c07 --- /dev/null +++ b/modules/nf-core/metabuli/build/environment.yml @@ -0,0 +1,6 @@ +name: metabuli_build +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::metabuli=1.0.5 diff --git a/modules/nf-core/metabuli/build/main.nf b/modules/nf-core/metabuli/build/main.nf index b7870d1b14f..d85e0f4180a 100644 --- a/modules/nf-core/metabuli/build/main.nf +++ b/modules/nf-core/metabuli/build/main.nf @@ -5,17 +5,17 @@ process METABULI_BUILD { conda "bioconda::metabuli=1.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/metabuli:1.0.0--pl5321hf1761c0_0': - 'biocontainers/metabuli:1.0.0--pl5321hf1761c0_0' }" + 'https://depot.galaxyproject.org/singularity/metabuli:1.0.5--pl5321h6a68c12_1': + 'biocontainers/metabuli:1.0.5--pl5321h6a68c12_1' }" input: - path(genomes) + tuple val(meta),path(genomes) path(acc2taxid) path(db) output: path "metabuli_db.tar.gz", emit: db - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -23,7 +23,7 @@ process METABULI_BUILD { script: def args = task.ext.args ?: '' def args_lib = task.ext.args_lib ?: '' - def skip_lib = params.skip_lib ?: false + // def skip_lib = params.skip_lib ?: false """ ls $genomes > fastas.txt metabuli \\ @@ -41,7 +41,7 @@ process METABULI_BUILD { lib.txt \\ $acc2taxid \\ $args - + rm -r $db/library tar -czf metabuli_db.tar.gz $db rm -r $db diff --git a/modules/nf-core/metabuli/build/meta.yml b/modules/nf-core/metabuli/build/meta.yml index b4e0b5d04e5..14879d950f7 100644 --- a/modules/nf-core/metabuli/build/meta.yml +++ b/modules/nf-core/metabuli/build/meta.yml @@ -23,15 +23,15 @@ input: - acc2taxid: type: file description: | - Accession number to taxid file. - A four column tsv with the following header: + Accession number to taxid file. + A four column tsv with the following header: accession, accession.revision, taxid, gi. - db: type: directory description: | - Output folder name for the database. - If the taxonomy-path optional parameter is not set to a directory - containing names.dmp and nodes.dmp, this directory needs + Output folder name for the database. + If the taxonomy-path optional parameter is not set to a directory + containing names.dmp and nodes.dmp, this directory needs to contain a folder named taxonomy containing those two files. output: @@ -46,3 +46,4 @@ output: authors: - "@TheOafidian" + - "@Joon-Klaps" diff --git a/modules/nf-core/metabuli/build/tests/main.nf.test b/modules/nf-core/metabuli/build/tests/main.nf.test new file mode 100644 index 00000000000..e72cdba89b4 --- /dev/null +++ b/modules/nf-core/metabuli/build/tests/main.nf.test @@ -0,0 +1,597 @@ +// nf-core modules test metabuli/build +nextflow_process { + + name "Test Process METABULI_BUILD" + script "../main.nf" + process "METABULI_BUILD" + + tag "modules" + tag "modules_nfcore" + tag "metabuli" + tag "metabuli/build" + + + test("sarscov2 - paired-end illumina reads") { + + setup { + + run("SEQTK_MERGEPE") { + script "../../../seqtk/mergepe/main.nf" + config "./modules.config" + tag "seqtk/mergepe" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + """ + } + } + + run("MERYL_COUNT") { + script "../../../meryl/count/main.nf" + config "./modules.config" + tag "meryl/count" + process { + """ + input[0] = SEQTK_MERGEPE.out.reads + input[1] = Channel.value(6) + """ + } + } + + + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = MERYL_COUNT.out.meryl_db + input[2] = [] + input[3] = [] + input[4] = 10 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.get(0), process.out.get(2)).match() }, + // The following asserts that the instable output stdout_log generated by the module is complete + { assert path(process.out.get(1).get(0)).readLines().last().contains("Bye!") } + ) + } + + } + + test("sarscov2 - paired-end illumina reads - stub") { + + options "-stub" + + setup { + + run("SEQTK_MERGEPE") { + script "../../../seqtk/mergepe/main.nf" + config "./modules.config" + tag "seqtk/mergepe" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + """ + } + } + + run("MERYL_COUNT") { + script "../../../meryl/count/main.nf" + config "./modules.config" + tag "meryl/count" + process { + """ + input[0] = SEQTK_MERGEPE.out.reads + input[1] = Channel.value(6) + """ + } + } + + + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = MERYL_COUNT.out.meryl_db + input[2] = [] + input[3] = [] + input[4] = 10 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + + test("sarscov2 - paired-end illumina reads plus optional inputs lookup_table and seqmers") { + + setup { + + run("SEQTK_MERGEPE") { + script "../../../seqtk/mergepe/main.nf" + config "./modules.config" + tag "seqtk/mergepe" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + """ + } + } + + run("MERYL_COUNT", alias: "MERYL_COUNT_READS") { + script "../../../meryl/count/main.nf" + config "./modules.config" + tag "meryl/count" + process { + """ + input[0] = SEQTK_MERGEPE.out.reads + input[1] = Channel.value(6) + """ + } + } + + run("MERYL_COUNT", alias: "MERYL_COUNT_ASM") { + script "../../../meryl/count/main.nf" + config "./modules.config" + tag "meryl/count" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = Channel.value(6) + """ + } + } + + run("MERYL_HISTOGRAM") { + script "../../../meryl/histogram/main.nf" + config "./modules.config" + tag "meryl/histogram" + process { + """ + input[0] = MERYL_COUNT_READS.out.meryl_db + input[1] = Channel.value(6) + """ + } + } + + run("GENOMESCOPE2") { + script "../../../genomescope2/main.nf" + config "./modules.config" + tag "genomescope2" + process { + """ + input[0] = MERYL_HISTOGRAM.out.hist + """ + } + } + + + + + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = MERYL_COUNT_READS.out.meryl_db + input[2] = GENOMESCOPE2.out.lookup_table.map{ meta, lkp -> lkp} + input[3] = MERYL_COUNT_ASM.out.meryl_db.map{ meta, meryl_db -> meryl_db } + input[4] = 10 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.get(0), process.out.get(2)).match() }, + // The following asserts that the instable output stdout_log generated by the module is complete + { assert path(process.out.get(1).get(0)).readLines().last().contains("Bye!") } + ) + } + + } + + test("sarscov2 - paired-end illumina reads plus inputs lookup_table and seqmers - stub") { + + options "-stub" + + setup { + + run("SEQTK_MERGEPE") { + script "../../../seqtk/mergepe/main.nf" + config "./modules.config" + tag "seqtk/mergepe" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + """ + } + } + + run("MERYL_COUNT", alias: "MERYL_COUNT_READS") { + script "../../../meryl/count/main.nf" + config "./modules.config" + tag "meryl/count" + process { + """ + input[0] = SEQTK_MERGEPE.out.reads + input[1] = Channel.value(6) + """ + } + } + + run("MERYL_COUNT", alias: "MERYL_COUNT_ASM") { + script "../../../meryl/count/main.nf" + config "./modules.config" + tag "meryl/count" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = Channel.value(6) + """ + } + } + + run("MERYL_HISTOGRAM") { + script "../../../meryl/histogram/main.nf" + config "./modules.config" + tag "meryl/histogram" + process { + """ + input[0] = MERYL_COUNT_READS.out.meryl_db + input[1] = Channel.value(6) + """ + } + } + + run("GENOMESCOPE2") { + script "../../../genomescope2/main.nf" + config "./modules.config" + tag "genomescope2" + process { + """ + input[0] = MERYL_HISTOGRAM.out.hist + """ + } + } + + + + + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = MERYL_COUNT_READS.out.meryl_db + input[2] = GENOMESCOPE2.out.lookup_table.map{ meta, lkp -> lkp } + input[3] = MERYL_COUNT_ASM.out.meryl_db.map{ meta, meryl_db -> meryl_db } + input[4] = 10 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("Multiple samples - sarscov2 + bacteroides_fragilis - paired-end illumina reads") { + + setup { + + run("SEQTK_MERGEPE") { + script "../../../seqtk/mergepe/main.nf" + config "./modules.config" + tag "seqtk/mergepe" + process { + """ + input[0] = Channel.from([ + [ id:'test_sars', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ], [ + [ id:'test_fragilis', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + '/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_2.fastq.gz', checkIfExists: true) + ] + ]) + """ + } + } + + run("MERYL_COUNT", alias: "MERYL_COUNT_READS") { + script "../../../meryl/count/main.nf" + config "./modules.config" + tag "meryl/count" + process { + """ + input[0] = SEQTK_MERGEPE.out.reads + input[1] = Channel.value(6) + """ + } + } + + run("MERYL_COUNT", alias: "MERYL_COUNT_ASM") { + script "../../../meryl/count/main.nf" + config "./modules.config" + tag "meryl/count" + process { + """ + input[0] = Channel.from([ + [ id:'test_sars', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ],[ + [ id:'test_fragilis', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ]) + input[1] = Channel.value(6) + """ + } + } + + run("MERYL_HISTOGRAM") { + script "../../../meryl/histogram/main.nf" + config "./modules.config" + tag "meryl/histogram" + process { + """ + input[0] = MERYL_COUNT_READS.out.meryl_db + input[1] = Channel.value(6) + """ + } + } + + run("GENOMESCOPE2") { + script "../../../genomescope2/main.nf" + config "./modules.config" + tag "genomescope2" + process { + """ + input[0] = MERYL_HISTOGRAM.out.hist + """ + } + } + + + + + } + + when { + process { + """ + Channel.from([ + [ id:'test_sars', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ],[ + [ id:'test_fragilis', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ]) + .set{ input_zero_ch } + + input_zero_ch.join( MERYL_COUNT_READS.out.meryl_db ) + .join( MERYL_COUNT_ASM.out.meryl_db ) + .join( GENOMESCOPE2.out.lookup_table ) + .multiMap{ meta, asm, meryl_db, asm_meryl_db, lookup_table -> + asm_ch: [meta, asm] + meryl_ch: [meta, meryl_db] + asm_meryl_ch: [asm_meryl_db] + lk_ch: [lookup_table] + }.set{ multimap_inputs_ch } + + input[0] = multimap_inputs_ch.asm_ch + input[1] = multimap_inputs_ch.meryl_ch + input[2] = multimap_inputs_ch.lk_ch + input[3] = multimap_inputs_ch.asm_meryl_ch + input[4] = 10 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.get(0), process.out.get(2)).match() }, + // The following asserts that the output stdout_log generated by the module is complete + { assert path(process.out.get(1).get(0)).readLines().last().contains("Bye!") } + ) + } + + } + + test("Multiple samples - sarscov2 + bacteroides_fragilis - paired-end illumina reads - stub") { + + options "-stub" + + setup { + + run("SEQTK_MERGEPE") { + script "../../../seqtk/mergepe/main.nf" + config "./modules.config" + tag "seqtk/mergepe" + process { + """ + input[0] = Channel.from([ + [ id:'test_sars', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ], [ + [ id:'test_fragilis', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + '/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_2.fastq.gz', checkIfExists: true) + ] + ]) + """ + } + } + + run("MERYL_COUNT", alias: "MERYL_COUNT_READS") { + script "../../../meryl/count/main.nf" + config "./modules.config" + tag "meryl/count" + process { + """ + input[0] = SEQTK_MERGEPE.out.reads + input[1] = Channel.value(6) + """ + } + } + + run("MERYL_COUNT", alias: "MERYL_COUNT_ASM") { + script "../../../meryl/count/main.nf" + config "./modules.config" + tag "meryl/count" + process { + """ + input[0] = Channel.from([ + [ id:'test_sars', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ],[ + [ id:'test_fragilis', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ]) + input[1] = Channel.value(6) + """ + } + } + + run("MERYL_HISTOGRAM") { + script "../../../meryl/histogram/main.nf" + config "./modules.config" + tag "meryl/histogram" + process { + """ + input[0] = MERYL_COUNT_READS.out.meryl_db + input[1] = Channel.value(6) + """ + } + } + + run("GENOMESCOPE2") { + script "../../../genomescope2/main.nf" + config "./modules.config" + tag "genomescope2" + process { + """ + input[0] = MERYL_HISTOGRAM.out.hist + """ + } + } + + + + + } + + when { + process { + """ + Channel.from([ + [ id:'test_sars', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ],[ + [ id:'test_fragilis', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ]) + .set{ input_zero_ch } + + + input_zero_ch.join( MERYL_COUNT_READS.out.meryl_db ) + .join( MERYL_COUNT_ASM.out.meryl_db ) + .join( GENOMESCOPE2.out.lookup_table ) + .multiMap{ meta, asm, meryl_db, asm_meryl_db, lookup_table -> + asm_ch: [meta, asm] + meryl_ch: [meta, meryl_db] + asm_meryl_ch: [asm_meryl_db] + lk_ch: [lookup_table] + }.set{ multimap_inputs_ch } + + input[0] = multimap_inputs_ch.asm_ch + input[1] = multimap_inputs_ch.meryl_ch + input[2] = multimap_inputs_ch.lk_ch + input[3] = MERYL_COUNT_ASM.out.meryl_db.map{ meta, meryl_db -> meryl_db } + input[4] = 10 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.get(0), process.out.get(2)).match() } + ) + } + + } +} diff --git a/modules/nf-core/metabuli/build/tests/main.nf.test.snap b/modules/nf-core/metabuli/build/tests/main.nf.test.snap new file mode 100644 index 00000000000..2d1f4e9da8f --- /dev/null +++ b/modules/nf-core/metabuli/build/tests/main.nf.test.snap @@ -0,0 +1,184 @@ +{ + "Multiple samples - sarscov2 + bacteroides_fragilis - paired-end illumina reads": { + "content": [ + [ + [ + { + "id": "test_fragilis", + "single_end": false + }, + "test_fragilis.hist:md5,3380e6f8b82739edc44b45ac6b50525e" + ], + [ + { + "id": "test_sars", + "single_end": false + }, + "test_sars.hist:md5,564c5dc22b9a10041b248405bb2b15d0" + ] + ], + [ + "versions.yml:md5,078b89694148fc55475e89987173ac4e", + "versions.yml:md5,078b89694148fc55475e89987173ac4e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-02T11:05:51.416187" + }, + "sarscov2 - paired-end illumina reads - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.hist:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "test.hist.stderr.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "2": [ + "versions.yml:md5,078b89694148fc55475e89987173ac4e" + ], + "hist": [ + [ + { + "id": "test", + "single_end": false + }, + "test.hist:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_stderr": [ + "test.hist.stderr.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,078b89694148fc55475e89987173ac4e" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-02T11:04:28.744584" + }, + "sarscov2 - paired-end illumina reads plus inputs lookup_table and seqmers - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.hist:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "test.hist.stderr.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "2": [ + "versions.yml:md5,078b89694148fc55475e89987173ac4e" + ], + "hist": [ + [ + { + "id": "test", + "single_end": false + }, + "test.hist:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_stderr": [ + "test.hist.stderr.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,078b89694148fc55475e89987173ac4e" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-02T11:04:51.831185" + }, + "sarscov2 - paired-end illumina reads plus optional inputs lookup_table and seqmers": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.hist:md5,564c5dc22b9a10041b248405bb2b15d0" + ] + ], + [ + "versions.yml:md5,078b89694148fc55475e89987173ac4e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-02T11:04:41.739829" + }, + "Multiple samples - sarscov2 + bacteroides_fragilis - paired-end illumina reads - stub": { + "content": [ + [ + [ + { + "id": "test_fragilis", + "single_end": false + }, + "test_fragilis.hist:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test_sars", + "single_end": false + }, + "test_sars.hist:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + "versions.yml:md5,078b89694148fc55475e89987173ac4e", + "versions.yml:md5,078b89694148fc55475e89987173ac4e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-02T11:06:03.566853" + }, + "sarscov2 - paired-end illumina reads": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.hist:md5,e722be64a7fffeec654a10bea92ec619" + ] + ], + [ + "versions.yml:md5,078b89694148fc55475e89987173ac4e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-02T11:04:20.733553" + } +} \ No newline at end of file diff --git a/modules/nf-core/metabuli/build/tests/tags.yml b/modules/nf-core/metabuli/build/tests/tags.yml new file mode 100644 index 00000000000..b931e457ad4 --- /dev/null +++ b/modules/nf-core/metabuli/build/tests/tags.yml @@ -0,0 +1,2 @@ +metabuli/build: + - "modules/nf-core/metabuli/build/**" diff --git a/modules/nf-core/metabuli/classify/environment.yml b/modules/nf-core/metabuli/classify/environment.yml new file mode 100644 index 00000000000..78c213cdfea --- /dev/null +++ b/modules/nf-core/metabuli/classify/environment.yml @@ -0,0 +1,6 @@ +name: metabuli_classify +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::metabuli=1.0.5 diff --git a/modules/nf-core/metabuli/classify/main.nf b/modules/nf-core/metabuli/classify/main.nf index ec4715810e0..02096f785c9 100644 --- a/modules/nf-core/metabuli/classify/main.nf +++ b/modules/nf-core/metabuli/classify/main.nf @@ -4,8 +4,8 @@ process METABULI_CLASSIFY { conda "bioconda::metabuli=1.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/metabuli:1.0.0--pl5321hf1761c0_0': - 'biocontainers/metabuli:1.0.0--pl5321hf1761c0_0' }" + 'https://depot.galaxyproject.org/singularity/metabuli:1.0.5--pl5321h6a68c12_1': + 'biocontainers/metabuli:1.0.5--pl5321h6a68c12_1' }" input: tuple val(meta), path(fastas) @@ -13,8 +13,8 @@ process METABULI_CLASSIFY { output: tuple val(meta), path("*/*_classifications.tsv"), emit: classification - tuple val(meta), path("*/*_report.tsv"), emit: report - path "versions.yml" , emit: versions + tuple val(meta), path("*/*_report.tsv") , emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -29,7 +29,7 @@ process METABULI_CLASSIFY { } else if (is_compressed) { input = "${fastas[0].baseName} ${fastas[1].baseName}" } - + """ if [ "$is_compressed" == "true" ]; then gzip -d *.gz diff --git a/modules/nf-core/metabuli/classify/meta.yml b/modules/nf-core/metabuli/classify/meta.yml index 0d8b2004023..fd910db5c21 100644 --- a/modules/nf-core/metabuli/classify/meta.yml +++ b/modules/nf-core/metabuli/classify/meta.yml @@ -36,7 +36,7 @@ output: type: map description: | Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] + e.g. [ id:'test', single_end:false ] - versions: type: file description: File containing software versions @@ -52,3 +52,4 @@ output: authors: - "@TheOafidian" + - "@Joon-Klaps" From f2f8ea93e0d4c24c35e216349151e2619fff6849 Mon Sep 17 00:00:00 2001 From: Joon-Klaps Date: Fri, 21 Jun 2024 09:00:59 +0000 Subject: [PATCH 17/20] furthere splitting up add & build --- modules/nf-core/metabuli/add/main.nf | 31 ++++++------ modules/nf-core/metabuli/add/meta.yml | 69 ++++++++++++-------------- modules/nf-core/metabuli/build/main.nf | 15 ------ 3 files changed, 47 insertions(+), 68 deletions(-) diff --git a/modules/nf-core/metabuli/add/main.nf b/modules/nf-core/metabuli/add/main.nf index 41c657fe373..4e463b7a62c 100644 --- a/modules/nf-core/metabuli/add/main.nf +++ b/modules/nf-core/metabuli/add/main.nf @@ -9,9 +9,8 @@ process METABULI_ADD { input: tuple val(meta), path(fasta) - path taxonomy_names, stageAs: 'taxonomy/names.dmp' - path taxonomy_nodes, stageAs: 'taxonomy/nodes.dmp' - path accession2taxid, stageAs: 'taxonomy/*' + path accession2taxid + path db output: tuple val(meta), path("${prefix}"), emit: db @@ -25,33 +24,31 @@ process METABULI_ADD { def prefix = task.ext.prefix ?: "${meta.id}" """ - samtools \\ - sort \\ - $args \\ - -@ $task.cpus \\ - -o ${prefix}.bam \\ - -T $prefix \\ - $bam + mkdir -p $prefix + mv $db/* $prefix + + ls $fasta > fasta.txt + metabuli add-to-library \\ + fasta.txt \\ + $accession2taxid \\ + $prefix \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": - metabuli: \$(samtools --version |& sed '1!d ; s/samtools //') + metabuli: \$(metabuli version) END_VERSIONS """ stub: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ - touch ${prefix}.bam + mkdir -p $prefix cat <<-END_VERSIONS > versions.yml "${task.process}": - metabuli: \$(samtools --version |& sed '1!d ; s/samtools //') + metabuli: \$(metabuli version) END_VERSIONS """ } diff --git a/modules/nf-core/metabuli/add/meta.yml b/modules/nf-core/metabuli/add/meta.yml index 0537f1e027d..cfcc06b4836 100644 --- a/modules/nf-core/metabuli/add/meta.yml +++ b/modules/nf-core/metabuli/add/meta.yml @@ -1,57 +1,54 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "metabuli_add" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: Add fasta sequences to a database needed to use the metabuli classification tool keywords: - - sort - - example - - genomics + - metabuli + - metagenomics + - profiling + - taxonomy + - database tools: - "metabuli": - ## TODO nf-core: Add a description and other details for the software below description: "Metabuli: specific and sensitive metagenomic classification via joint analysis of DNA and amino acid" - homepage: "None" - documentation: "None" - tool_dev_url: "None" - doi: "" - licence: ['GPL v3'] + homepage: "https://github.com/steineggerlab/Metabuli" + documentation: "https://github.com/steineggerlab/Metabuli#readme" + tool_dev_url: "https://github.com/steineggerlab/Metabuli" + doi: "10.1101/2023.05.31.543018" + licence: "['GPL v3']" -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - ## TODO nf-core: Delete / customise this example input - - bam: + e.g. [ id:'test', single_end:false ] + - fasta: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: Genome files that serve as entries in the database + pattern: "*.{fa,fasta,fna}" + - accession2taxid: + type: file + description: | + Accession number to taxid file. + A four column tsv with the following header: + accession, accession.revision, taxid, gi. + - db: + type: directory + description: | + Output folder name for the database. + If the taxonomy-path optional parameter is not set to a directory + containing names.dmp and nodes.dmp, this directory needs + to contain a folder named taxonomy containing those two files. -## TODO nf-core: Add a description of all of the variables used as output output: - #Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - versions: type: file description: File containing software versions pattern: "versions.yml" - ## TODO nf-core: Delete / customise this example output - - bam: - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + - db: + type: directory + description: Compressed metabuli database + pattern: "${prefix}/" authors: - - "@Joon-Klaps" -maintainers: + - "@TheOafidian" - "@Joon-Klaps" diff --git a/modules/nf-core/metabuli/build/main.nf b/modules/nf-core/metabuli/build/main.nf index d85e0f4180a..4645babe3fe 100644 --- a/modules/nf-core/metabuli/build/main.nf +++ b/modules/nf-core/metabuli/build/main.nf @@ -9,8 +9,6 @@ process METABULI_BUILD { 'biocontainers/metabuli:1.0.5--pl5321h6a68c12_1' }" input: - tuple val(meta),path(genomes) - path(acc2taxid) path(db) output: @@ -22,16 +20,7 @@ process METABULI_BUILD { script: def args = task.ext.args ?: '' - def args_lib = task.ext.args_lib ?: '' - // def skip_lib = params.skip_lib ?: false """ - ls $genomes > fastas.txt - metabuli \\ - add-to-library \\ - fastas.txt \\ - $acc2taxid \\ - $db - $args_lib ls $db/library/* > lib.txt metabuli \\ @@ -42,10 +31,6 @@ process METABULI_BUILD { $acc2taxid \\ $args - rm -r $db/library - tar -czf metabuli_db.tar.gz $db - rm -r $db - cat <<-END_VERSIONS > versions.yml "${task.process}": metabuli: \$(metabuli | grep Version | sed 's/^metabuli Version: //';)) From 1b9d0eab74873192ed519fd0bbea168fa39905e7 Mon Sep 17 00:00:00 2001 From: Joon-Klaps Date: Fri, 21 Jun 2024 13:33:35 +0000 Subject: [PATCH 18/20] removing old pytest files --- tests/config/pytest_modules.yml | 6 --- tests/modules/nf-core/metabuli/build/main.nf | 35 --------------- .../nf-core/metabuli/build/nextflow.config | 6 --- tests/modules/nf-core/metabuli/build/test.yml | 17 -------- .../modules/nf-core/metabuli/classify/main.nf | 43 ------------------- .../nf-core/metabuli/classify/nextflow.config | 17 -------- .../nf-core/metabuli/classify/test.yml | 27 ------------ 7 files changed, 151 deletions(-) delete mode 100644 tests/modules/nf-core/metabuli/build/main.nf delete mode 100644 tests/modules/nf-core/metabuli/build/nextflow.config delete mode 100644 tests/modules/nf-core/metabuli/build/test.yml delete mode 100644 tests/modules/nf-core/metabuli/classify/main.nf delete mode 100644 tests/modules/nf-core/metabuli/classify/nextflow.config delete mode 100644 tests/modules/nf-core/metabuli/classify/test.yml diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 08255a01594..e94f139e46a 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -990,12 +990,6 @@ metabat2/metabat2: metaeuk/easypredict: - modules/nf-core/metaeuk/easypredict/** - tests/modules/nf-core/metaeuk/easypredict/** -metabuli/build: - - modules/nf-core/metabuli/build/** - - tests/modules/nf-core/metabuli/build/** -metabuli/classify: - - modules/nf-core/metabuli/classify/** - - tests/modules/nf-core/metabuli/classify/** metaphlan/makedb: - modules/nf-core/metaphlan/makedb/** - tests/modules/nf-core/metaphlan/makedb/** diff --git a/tests/modules/nf-core/metabuli/build/main.nf b/tests/modules/nf-core/metabuli/build/main.nf deleted file mode 100644 index b665bafd1fc..00000000000 --- a/tests/modules/nf-core/metabuli/build/main.nf +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -include { METABULI_BUILD } from '../../../../../modules/nf-core/metabuli/build/main.nf' - -process CREATE_TAXONOMY_FOLDER{ - input: - path(dmpfiles) - - output: - path(db) - - script: - """ - mkdir -p db/taxonomy - mv *.dmp db/taxonomy - touch db/taxonomy/merged.dmp - """ - -} - -workflow test_metabuli_build { - - genome = file("${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta", checkIfExists: true) - - dmp_files = [ - file("${params.test_data_base}/data/delete_me/metabuli/taxonomy/names.dmp"), - file("${params.test_data_base}/data/delete_me/metabuli/taxonomy/nodes.dmp") - ] - acc2taxid = file("${params.test_data_base}/data/delete_me/metabuli/acc2taxid") - - tax = CREATE_TAXONOMY_FOLDER(dmp_files) - METABULI_BUILD ( genome, acc2taxid, tax ) -} diff --git a/tests/modules/nf-core/metabuli/build/nextflow.config b/tests/modules/nf-core/metabuli/build/nextflow.config deleted file mode 100644 index 5c70d71b82f..00000000000 --- a/tests/modules/nf-core/metabuli/build/nextflow.config +++ /dev/null @@ -1,6 +0,0 @@ -process { - - publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - memory = "16GB" - -} diff --git a/tests/modules/nf-core/metabuli/build/test.yml b/tests/modules/nf-core/metabuli/build/test.yml deleted file mode 100644 index aadaa1b9279..00000000000 --- a/tests/modules/nf-core/metabuli/build/test.yml +++ /dev/null @@ -1,17 +0,0 @@ -- name: metabuli build test_metabuli_build - command: nextflow run ./tests/modules/nf-core/metabuli/build -entry test_metabuli_build -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/metabuli/build/nextflow.config -stub-run - tags: - - metabuli/build - - metabuli - files: - - path: output/build/acc2taxid - md5sum: 4ae5ac3b865cb7bf301c3cc72153a7a8 - - path: output/create/db/taxonomy/merged.dmp - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: output/create/db/taxonomy/names.dmp - md5sum: 1cacf8dba5defe61c7b31dc4342801a0 - - path: output/create/db/taxonomy/nodes.dmp - md5sum: a41041a713e9fb2be5eac4723a421385 - - path: output/metabuli/metabuli_db.tar.gz - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: output/metabuli/versions.yml diff --git a/tests/modules/nf-core/metabuli/classify/main.nf b/tests/modules/nf-core/metabuli/classify/main.nf deleted file mode 100644 index a2366ad0c42..00000000000 --- a/tests/modules/nf-core/metabuli/classify/main.nf +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -include { UNTAR } from '../../../../../modules/nf-core/untar/main.nf' -include { METABULI_CLASSIFY as METABULI_CLASSIFY_PE } from '../../../../../modules/nf-core/metabuli/classify/main.nf' -include { METABULI_CLASSIFY as METABULI_CLASSIFY_SE } from '../../../../../modules/nf-core/metabuli/classify/main.nf' - -// test with single end data -workflow test_metabuli_classify_se { - - input = [ - [ id:'test_se', single_end:true ], // meta map - [ - file("${params.test_data_base}/data/genomics/sarscov2/nanopore/fastq/test_2.fastq.gz", checkIfExists: true), - ] - ] - - db_archive = file("${params.test_data_base}/data/delete_me/metabuli/metabuli_db.tar.gz", checkIfExists: true) - UNTAR( [[:], db_archive]) - METABULI_CLASSIFY_SE ( input , UNTAR.out.untar.map{it[1]}) -} - -// test with paired end data -workflow test_metabuli_classify_pe { - - input = Channel.from( - [[ id:'test_pe', single_end:false ], // meta map - [ - file("${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test2_1.fastq.gz", checkIfExists: true), - file("${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true), - ]], - [[ id:'test_pe2', single_end:false ], // meta map - [ - file("${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test2_1.fastq.gz", checkIfExists: true), - file("${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true), - ]] - ) - - db_archive = file("${params.test_data_base}/data/delete_me/metabuli/metabuli_db.tar.gz", checkIfExists: true) - UNTAR([[:], db_archive]) - METABULI_CLASSIFY_PE ( input , UNTAR.out.untar.map{it[1]}) -} diff --git a/tests/modules/nf-core/metabuli/classify/nextflow.config b/tests/modules/nf-core/metabuli/classify/nextflow.config deleted file mode 100644 index 5b77ae739ad..00000000000 --- a/tests/modules/nf-core/metabuli/classify/nextflow.config +++ /dev/null @@ -1,17 +0,0 @@ -process { - - publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - - withLabel:process_medium { - memory = 6.5.GB - } - withName: SEQTK_SEQ { - ext.args = "-a" - } - - withName: SEQTK_SEQ_PE_RV { - ext.args = "-a" - ext.prefix = "reverse" - } -} - diff --git a/tests/modules/nf-core/metabuli/classify/test.yml b/tests/modules/nf-core/metabuli/classify/test.yml deleted file mode 100644 index a7bd8f85d64..00000000000 --- a/tests/modules/nf-core/metabuli/classify/test.yml +++ /dev/null @@ -1,27 +0,0 @@ -- name: metabuli classify test_metabuli_classify_se - command: nextflow run ./tests/modules/nf-core/metabuli/classify -entry test_metabuli_classify_se -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/metabuli/classify/nextflow.config - tags: - - metabuli/classify - - metabuli - files: - - path: output/metabuli/test_se_out/test_se_classifications.tsv - md5sum: 3870c45887908e8e4b3c60c23ae3e008 - - path: output/metabuli/test_se_out/test_se_report.tsv - md5sum: 686930217102cd8d249906387de3e6b5 - - path: output/metabuli/versions.yml - -- name: metabuli classify test_metabuli_classify_pe - command: nextflow run ./tests/modules/nf-core/metabuli/classify -entry test_metabuli_classify_pe -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/metabuli/classify/nextflow.config - tags: - - metabuli/classify - - metabuli - files: - - path: output/metabuli/test_pe2_out/test_pe2_classifications.tsv - md5sum: d3b961b3a1e3f1181d182583ee246523 - - path: output/metabuli/test_pe2_out/test_pe2_report.tsv - md5sum: 0cc7f995f15304368574b4217cdd13b9 - - path: output/metabuli/test_pe_out/test_pe_classifications.tsv - md5sum: d3b961b3a1e3f1181d182583ee246523 - - path: output/metabuli/test_pe_out/test_pe_report.tsv - md5sum: 0cc7f995f15304368574b4217cdd13b9 - - path: output/metabuli/versions.yml From 92aaf7f827d1e039d3ebe0b0a8dc3a3f7b9173b2 Mon Sep 17 00:00:00 2001 From: Joon-Klaps Date: Fri, 21 Jun 2024 13:34:16 +0000 Subject: [PATCH 19/20] testing setups metabuli - needs fixing --- modules/nf-core/metabuli/add/environment.yml | 3 - modules/nf-core/metabuli/add/main.nf | 22 +- modules/nf-core/metabuli/add/meta.yml | 33 +- .../nf-core/metabuli/add/tests/main.nf.test | 77 ++- .../metabuli/add/tests/main.nf.test.snap | 94 +++ .../nf-core/metabuli/build/environment.yml | 4 +- modules/nf-core/metabuli/build/main.nf | 38 +- modules/nf-core/metabuli/build/meta.yml | 39 +- .../nf-core/metabuli/build/tests/main.nf.test | 556 +----------------- .../metabuli/build/tests/main.nf.test.snap | 249 ++++---- .../metabuli/build/tests/nextflow.config | 6 + .../nf-core/metabuli/classify/environment.yml | 2 +- modules/nf-core/metabuli/classify/main.nf | 42 +- .../metabuli/classify/tests/main.nf.test | 81 +++ .../metabuli/classify/tests/main.nf.test.snap | 82 +++ .../classify/tests/nextflow-se.config | 5 + .../nf-core/metabuli/classify/tests/tags.yml | 3 + 17 files changed, 551 insertions(+), 785 deletions(-) create mode 100644 modules/nf-core/metabuli/add/tests/main.nf.test.snap create mode 100644 modules/nf-core/metabuli/build/tests/nextflow.config create mode 100644 modules/nf-core/metabuli/classify/tests/main.nf.test create mode 100644 modules/nf-core/metabuli/classify/tests/main.nf.test.snap create mode 100644 modules/nf-core/metabuli/classify/tests/nextflow-se.config create mode 100644 modules/nf-core/metabuli/classify/tests/tags.yml diff --git a/modules/nf-core/metabuli/add/environment.yml b/modules/nf-core/metabuli/add/environment.yml index 44ccbb344ec..0e5eef64f23 100644 --- a/modules/nf-core/metabuli/add/environment.yml +++ b/modules/nf-core/metabuli/add/environment.yml @@ -1,9 +1,6 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json name: "metabuli_add" channels: - conda-forge - bioconda - - defaults dependencies: - "bioconda::metabuli=1.0.5" diff --git a/modules/nf-core/metabuli/add/main.nf b/modules/nf-core/metabuli/add/main.nf index 4e463b7a62c..022f201308a 100644 --- a/modules/nf-core/metabuli/add/main.nf +++ b/modules/nf-core/metabuli/add/main.nf @@ -9,28 +9,32 @@ process METABULI_ADD { input: tuple val(meta), path(fasta) - path accession2taxid - path db + path taxonomy_names, stageAs: 'taxonomy/names.dmp' + path taxonomy_nodes, stageAs: 'taxonomy/nodes.dmp' + path taxonomy_merged, stageAs: 'taxonomy/merged.dmp' + path accession2taxid, stageAs: 'taxonomy/*' output: - tuple val(meta), path("${prefix}"), emit: db - path "versions.yml" , emit: versions + tuple val(meta), path("$prefix"), emit: db + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" + make_merged = taxonomy_merged ? "" : "touch ${prefix}/taxonomy/merged.dmp" """ mkdir -p $prefix - mv $db/* $prefix + mv "taxonomy" $prefix + $make_merged - ls $fasta > fasta.txt + realpath $fasta > fasta.txt metabuli add-to-library \\ fasta.txt \\ - $accession2taxid \\ + $prefix/$accession2taxid \\ $prefix \\ $args @@ -42,7 +46,7 @@ process METABULI_ADD { stub: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" """ mkdir -p $prefix diff --git a/modules/nf-core/metabuli/add/meta.yml b/modules/nf-core/metabuli/add/meta.yml index cfcc06b4836..c8a4e70f0a1 100644 --- a/modules/nf-core/metabuli/add/meta.yml +++ b/modules/nf-core/metabuli/add/meta.yml @@ -20,24 +20,27 @@ input: type: map description: | Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] + e.g. `[ id:'sample1', single_end:false ]` - fasta: type: file - description: Genome files that serve as entries in the database - pattern: "*.{fa,fasta,fna}" + description: fasta file that will be added to the database + pattern: "*.{fa,fasta,fna,ffn}" + - taxonomy_names: + type: file + description: used for associating sequences with taxonomy IDs + pattern: "*.dmp" + - taxonomy_nodes: + type: file + description: tree nodes using NCBI taxonomy nomenclature + pattern: "*.dmp" + - taxonomy_merged: + type: file + description: merged.dmp maps secondary taxids onto primary taxids for taxa that have been synonymized in the database + pattern: "*.dmp" - accession2taxid: type: file - description: | - Accession number to taxid file. - A four column tsv with the following header: - accession, accession.revision, taxid, gi. - - db: - type: directory - description: | - Output folder name for the database. - If the taxonomy-path optional parameter is not set to a directory - containing names.dmp and nodes.dmp, this directory needs - to contain a folder named taxonomy containing those two files. + description: associates sequence accession IDs to taxonomy IDs + pattern: "*.accession2taxid" output: - versions: @@ -46,7 +49,7 @@ output: pattern: "versions.yml" - db: type: directory - description: Compressed metabuli database + description: Metabuli database pattern: "${prefix}/" authors: diff --git a/modules/nf-core/metabuli/add/tests/main.nf.test b/modules/nf-core/metabuli/add/tests/main.nf.test index e2af01e861a..8909691f6dd 100644 --- a/modules/nf-core/metabuli/add/tests/main.nf.test +++ b/modules/nf-core/metabuli/add/tests/main.nf.test @@ -1,5 +1,3 @@ -// TODO nf-core: Once you have added the required tests, please run the following command to build this file: -// nf-core modules test metabuli/add nextflow_process { name "Test Process METABULI_ADD" @@ -11,23 +9,22 @@ nextflow_process { tag "metabuli" tag "metabuli/add" - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used - test("sarscov2 - bam") { - - // TODO nf-core: If you are created a test for a chained module - // (the module requires running more than one process to generate the required output) - // add the 'setup' method here. - // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules). + test("sarscov2 protein_db") { when { process { """ - // TODO nf-core: define inputs of the process here. Example: - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) - ] + [ id:'test' ], + [ + file(params.modules_testdata_base_path +'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path +'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true), + ] + ] + input[1] = file(params.modules_testdata_base_path +'genomics/sarscov2/metagenome/prot_names.dmp', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path +'genomics/sarscov2/metagenome/prot_nodes.dmp', checkIfExists: true) + input[3] = [] // needs a merged.dmp + input[4] = file(params.modules_testdata_base_path + 'delete_me/metabuli/acc2taxid', checkIfExists: true) """ } } @@ -35,40 +32,38 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } - //TODO nf-core: Add all required assertions to verify the test output. - // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. + { assert snapshot (process.out).match()}, ) } - } - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. - test("sarscov2 - bam - stub") { + test("sarscov2 protein_db stub") { - options "-stub" + options "-stub" - when { - process { - """ - // TODO nf-core: define inputs of the process here. Example: - - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) - ] - """ + when { + process { + """ + input[0] = [ + [ id:'test' ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) + ] + ] + input[1] = file(params.test_data['sarscov2']['metagenome']['prot_names_dmp'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['metagenome']['prot_nodes_dmp'], checkIfExists: true) + input[3] = [] // needs a merged.dmp + input[4] = file(params.modules_testdata_base_path + 'delete_me/metabuli/acc2taxid', checkIfExists: true) + """ + } } - } - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - //TODO nf-core: Add all required assertions to verify the test output. - ) + then { + assertAll( + { assert process.success }, + { assert snapshot (process.out).match()}, + ) + } } - - } - } diff --git a/modules/nf-core/metabuli/add/tests/main.nf.test.snap b/modules/nf-core/metabuli/add/tests/main.nf.test.snap new file mode 100644 index 00000000000..6d002ee38ea --- /dev/null +++ b/modules/nf-core/metabuli/add/tests/main.nf.test.snap @@ -0,0 +1,94 @@ +{ + "sarscov2 protein_db": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + [ + "694009.fna:md5,e73599798195a519ba2565c3f0275b93" + ], + [ + "acc2taxid:md5,4ae5ac3b865cb7bf301c3cc72153a7a8", + "merged.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", + "names.dmp:md5,130f9132095562e09c732679c562f5e9", + "nodes.dmp:md5,c471c27a4ce85ae74d2c63633c9ce1e3" + ], + "unmapped.txt:md5,b8c64bc37b7dc465fdaf61b3cd90c29a" + ] + ] + ], + "1": [ + "versions.yml:md5,2a8623d8302b84cba425ddf9381464d4" + ], + "db": [ + [ + { + "id": "test" + }, + [ + [ + "694009.fna:md5,e73599798195a519ba2565c3f0275b93" + ], + [ + "acc2taxid:md5,4ae5ac3b865cb7bf301c3cc72153a7a8", + "merged.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", + "names.dmp:md5,130f9132095562e09c732679c562f5e9", + "nodes.dmp:md5,c471c27a4ce85ae74d2c63633c9ce1e3" + ], + "unmapped.txt:md5,b8c64bc37b7dc465fdaf61b3cd90c29a" + ] + ] + ], + "versions": [ + "versions.yml:md5,2a8623d8302b84cba425ddf9381464d4" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T12:01:48.840753727" + }, + "sarscov2 protein_db stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + + ] + ] + ], + "1": [ + "versions.yml:md5,2a8623d8302b84cba425ddf9381464d4" + ], + "db": [ + [ + { + "id": "test" + }, + [ + + ] + ] + ], + "versions": [ + "versions.yml:md5,2a8623d8302b84cba425ddf9381464d4" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T11:40:58.903080427" + } +} diff --git a/modules/nf-core/metabuli/build/environment.yml b/modules/nf-core/metabuli/build/environment.yml index b7c37e88c07..96f5393db66 100644 --- a/modules/nf-core/metabuli/build/environment.yml +++ b/modules/nf-core/metabuli/build/environment.yml @@ -1,6 +1,6 @@ -name: metabuli_build +name: "metabuli_build" channels: - conda-forge - bioconda dependencies: - - bioconda::metabuli=1.0.5 + - "bioconda::metabuli=1.0.5" diff --git a/modules/nf-core/metabuli/build/main.nf b/modules/nf-core/metabuli/build/main.nf index 4645babe3fe..8310467f935 100644 --- a/modules/nf-core/metabuli/build/main.nf +++ b/modules/nf-core/metabuli/build/main.nf @@ -1,48 +1,60 @@ process METABULI_BUILD { - tag 'build' + tag "$meta.id" label 'process_medium' - conda "bioconda::metabuli=1.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/metabuli:1.0.5--pl5321h6a68c12_1': 'biocontainers/metabuli:1.0.5--pl5321h6a68c12_1' }" input: - path(db) + tuple val(meta), path(db) + path accession2taxid, stageAs: 'taxonomy/acc2taxid' output: - path "metabuli_db.tar.gz", emit: db - path "versions.yml" , emit: versions + tuple val(meta), path("$prefix"), emit: db + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + acc2taxid = accession2taxid ? "${accession2taxid}" : "${db}/taxonomy/acc2taxid" """ + find ${db}/library -type f -name '*.fna' > library-files.txt - ls $db/library/* > lib.txt metabuli \\ build \\ - --threads $task.cpus \\ - $db \\ - lib.txt \\ - $acc2taxid \\ + ${db} \\ + library-files.txt \\ + ${acc2taxid} \\ + --db-name ${prefix} \\ + --threads ${task.cpus} \\ $args + if [[ \$(basename ${db}) != "${prefix}" ]]; then + mkdir -p ${prefix} + mv ${db}/* ${prefix} + fi + cat <<-END_VERSIONS > versions.yml "${task.process}": - metabuli: \$(metabuli | grep Version | sed 's/^metabuli Version: //';)) + metabuli: \$(metabuli version) END_VERSIONS """ stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" """ - touch metabuli_db.tar.gz + mkdir -p "$prefix" + cat <<-END_VERSIONS > versions.yml "${task.process}": - metabuli: \$(metabuli | grep Version | sed 's/^metabuli Version: //';)) + metabuli: \$(metabuli version) END_VERSIONS """ } diff --git a/modules/nf-core/metabuli/build/meta.yml b/modules/nf-core/metabuli/build/meta.yml index 14879d950f7..fe320627286 100644 --- a/modules/nf-core/metabuli/build/meta.yml +++ b/modules/nf-core/metabuli/build/meta.yml @@ -8,31 +8,24 @@ keywords: - database tools: - "metabuli": - description: "Metabuli: specific and sensitive metagenomic classification via joint analysis of DNA and amino acid" - homepage: "https://github.com/steineggerlab/Metabuli" - documentation: "https://github.com/steineggerlab/Metabuli#readme" - tool_dev_url: "https://github.com/steineggerlab/Metabuli" - doi: "10.1101/2023.05.31.543018" - licence: "['GPL v3']" + description: "Metabuli: specific and sensitive metagenomic classification via joint analysis of DNA and amino acid" + homepage: "https://github.com/steineggerlab/Metabuli" + documentation: "https://github.com/steineggerlab/Metabuli#readme" + tool_dev_url: "https://github.com/steineggerlab/Metabuli" + doi: "10.1101/2023.05.31.543018" + licence: "['GPL v3']" input: - - genomes: - type: file - description: Genome files that serve as entries in the database - pattern: "*.{fa,fasta,fna}" - - acc2taxid: - type: file - description: | - Accession number to taxid file. - A four column tsv with the following header: - accession, accession.revision, taxid, gi. + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` - db: - type: directory - description: | - Output folder name for the database. - If the taxonomy-path optional parameter is not set to a directory - containing names.dmp and nodes.dmp, this directory needs - to contain a folder named taxonomy containing those two files. + type: directory + description: | + contains required files to build the database + [ taxonomy/names.dmp, taxonomy/nodes.dmp, taxonomy/merged.dmp, acc2taxid, library/*.fna ] output: - versions: @@ -42,7 +35,7 @@ output: - db: type: file description: Compressed metabuli database - pattern: "*.tar.gz" + pattern: "${prefix}/" authors: - "@TheOafidian" diff --git a/modules/nf-core/metabuli/build/tests/main.nf.test b/modules/nf-core/metabuli/build/tests/main.nf.test index e72cdba89b4..b4fb6dc4a79 100644 --- a/modules/nf-core/metabuli/build/tests/main.nf.test +++ b/modules/nf-core/metabuli/build/tests/main.nf.test @@ -1,228 +1,44 @@ -// nf-core modules test metabuli/build nextflow_process { name "Test Process METABULI_BUILD" script "../main.nf" process "METABULI_BUILD" - - tag "modules" - tag "modules_nfcore" + config "./nextflow.config" tag "metabuli" tag "metabuli/build" + tag "metabuli/add" + tag "modules" + tag "modules_nfcore" - - test("sarscov2 - paired-end illumina reads") { - - setup { - - run("SEQTK_MERGEPE") { - script "../../../seqtk/mergepe/main.nf" - config "./modules.config" - tag "seqtk/mergepe" - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - [ - file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) - ] - ] - """ - } - } - - run("MERYL_COUNT") { - script "../../../meryl/count/main.nf" - config "./modules.config" - tag "meryl/count" - process { - """ - input[0] = SEQTK_MERGEPE.out.reads - input[1] = Channel.value(6) - """ - } - } - - - } - - when { + setup { + run("METABULI_ADD") { + script "modules/nf-core/metabuli/add/main.nf" process { """ input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - input[1] = MERYL_COUNT.out.meryl_db - input[2] = [] - input[3] = [] - input[4] = 10 - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.get(0), process.out.get(2)).match() }, - // The following asserts that the instable output stdout_log generated by the module is complete - { assert path(process.out.get(1).get(0)).readLines().last().contains("Bye!") } - ) - } - - } - - test("sarscov2 - paired-end illumina reads - stub") { - - options "-stub" - - setup { - - run("SEQTK_MERGEPE") { - script "../../../seqtk/mergepe/main.nf" - config "./modules.config" - tag "seqtk/mergepe" - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map + [ id:'test' ], [ - file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + file(params.modules_testdata_base_path +'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path +'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true), ] ] - """ - } - } - - run("MERYL_COUNT") { - script "../../../meryl/count/main.nf" - config "./modules.config" - tag "meryl/count" - process { - """ - input[0] = SEQTK_MERGEPE.out.reads - input[1] = Channel.value(6) - """ - } - } - - - } - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - input[1] = MERYL_COUNT.out.meryl_db - input[2] = [] - input[3] = [] - input[4] = 10 + input[1] = file(params.modules_testdata_base_path +'genomics/sarscov2/metagenome/prot_names.dmp', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path +'genomics/sarscov2/metagenome/prot_nodes.dmp', checkIfExists: true) + input[3] = [] // needs a merged.dmp + input[4] = file(params.modules_testdata_base_path + 'delete_me/metabuli/acc2taxid', checkIfExists: true) """ } } - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 - paired-end illumina reads plus optional inputs lookup_table and seqmers") { - - setup { - - run("SEQTK_MERGEPE") { - script "../../../seqtk/mergepe/main.nf" - config "./modules.config" - tag "seqtk/mergepe" - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - [ - file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) - ] - ] - """ - } - } - - run("MERYL_COUNT", alias: "MERYL_COUNT_READS") { - script "../../../meryl/count/main.nf" - config "./modules.config" - tag "meryl/count" - process { - """ - input[0] = SEQTK_MERGEPE.out.reads - input[1] = Channel.value(6) - """ - } - } - - run("MERYL_COUNT", alias: "MERYL_COUNT_ASM") { - script "../../../meryl/count/main.nf" - config "./modules.config" - tag "meryl/count" - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - input[1] = Channel.value(6) - """ - } - } - - run("MERYL_HISTOGRAM") { - script "../../../meryl/histogram/main.nf" - config "./modules.config" - tag "meryl/histogram" - process { - """ - input[0] = MERYL_COUNT_READS.out.meryl_db - input[1] = Channel.value(6) - """ - } - } - - run("GENOMESCOPE2") { - script "../../../genomescope2/main.nf" - config "./modules.config" - tag "genomescope2" - process { - """ - input[0] = MERYL_HISTOGRAM.out.hist - """ - } - } - - - - - } + test("sarscov2 protein_db") { when { process { """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - input[1] = MERYL_COUNT_READS.out.meryl_db - input[2] = GENOMESCOPE2.out.lookup_table.map{ meta, lkp -> lkp} - input[3] = MERYL_COUNT_ASM.out.meryl_db.map{ meta, meryl_db -> meryl_db } - input[4] = 10 + input[0] = METABULI_ADD.out.db + input[1] = [] """ } } @@ -230,228 +46,18 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.get(0), process.out.get(2)).match() }, - // The following asserts that the instable output stdout_log generated by the module is complete - { assert path(process.out.get(1).get(0)).readLines().last().contains("Bye!") } + { assert snapshot (process.out).match()}, ) } - } - test("sarscov2 - paired-end illumina reads plus inputs lookup_table and seqmers - stub") { - - options "-stub" - - setup { - - run("SEQTK_MERGEPE") { - script "../../../seqtk/mergepe/main.nf" - config "./modules.config" - tag "seqtk/mergepe" - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - [ - file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) - ] - ] - """ - } - } - - run("MERYL_COUNT", alias: "MERYL_COUNT_READS") { - script "../../../meryl/count/main.nf" - config "./modules.config" - tag "meryl/count" - process { - """ - input[0] = SEQTK_MERGEPE.out.reads - input[1] = Channel.value(6) - """ - } - } - - run("MERYL_COUNT", alias: "MERYL_COUNT_ASM") { - script "../../../meryl/count/main.nf" - config "./modules.config" - tag "meryl/count" - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - input[1] = Channel.value(6) - """ - } - } - - run("MERYL_HISTOGRAM") { - script "../../../meryl/histogram/main.nf" - config "./modules.config" - tag "meryl/histogram" - process { - """ - input[0] = MERYL_COUNT_READS.out.meryl_db - input[1] = Channel.value(6) - """ - } - } - - run("GENOMESCOPE2") { - script "../../../genomescope2/main.nf" - config "./modules.config" - tag "genomescope2" - process { - """ - input[0] = MERYL_HISTOGRAM.out.hist - """ - } - } - - - - - } - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - input[1] = MERYL_COUNT_READS.out.meryl_db - input[2] = GENOMESCOPE2.out.lookup_table.map{ meta, lkp -> lkp } - input[3] = MERYL_COUNT_ASM.out.meryl_db.map{ meta, meryl_db -> meryl_db } - input[4] = 10 - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() }, - ) - } - - } - - test("Multiple samples - sarscov2 + bacteroides_fragilis - paired-end illumina reads") { - - setup { - - run("SEQTK_MERGEPE") { - script "../../../seqtk/mergepe/main.nf" - config "./modules.config" - tag "seqtk/mergepe" - process { - """ - input[0] = Channel.from([ - [ id:'test_sars', single_end:false ], // meta map - [ - file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) - ] - ], [ - [ id:'test_fragilis', single_end:false ], // meta map - [ - file(params.modules_testdata_base_path + '/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + '/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_2.fastq.gz', checkIfExists: true) - ] - ]) - """ - } - } - - run("MERYL_COUNT", alias: "MERYL_COUNT_READS") { - script "../../../meryl/count/main.nf" - config "./modules.config" - tag "meryl/count" - process { - """ - input[0] = SEQTK_MERGEPE.out.reads - input[1] = Channel.value(6) - """ - } - } - - run("MERYL_COUNT", alias: "MERYL_COUNT_ASM") { - script "../../../meryl/count/main.nf" - config "./modules.config" - tag "meryl/count" - process { - """ - input[0] = Channel.from([ - [ id:'test_sars', single_end:false ], // meta map - file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ],[ - [ id:'test_fragilis', single_end:false ], // meta map - file(params.modules_testdata_base_path + '/genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) - ]) - input[1] = Channel.value(6) - """ - } - } - - run("MERYL_HISTOGRAM") { - script "../../../meryl/histogram/main.nf" - config "./modules.config" - tag "meryl/histogram" - process { - """ - input[0] = MERYL_COUNT_READS.out.meryl_db - input[1] = Channel.value(6) - """ - } - } - - run("GENOMESCOPE2") { - script "../../../genomescope2/main.nf" - config "./modules.config" - tag "genomescope2" - process { - """ - input[0] = MERYL_HISTOGRAM.out.hist - """ - } - } - - - - - } + test("sarscov2 - protein_db - acc2taxid") { when { process { """ - Channel.from([ - [ id:'test_sars', single_end:false ], // meta map - file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ],[ - [ id:'test_fragilis', single_end:false ], // meta map - file(params.modules_testdata_base_path + '/genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) - ]) - .set{ input_zero_ch } - - input_zero_ch.join( MERYL_COUNT_READS.out.meryl_db ) - .join( MERYL_COUNT_ASM.out.meryl_db ) - .join( GENOMESCOPE2.out.lookup_table ) - .multiMap{ meta, asm, meryl_db, asm_meryl_db, lookup_table -> - asm_ch: [meta, asm] - meryl_ch: [meta, meryl_db] - asm_meryl_ch: [asm_meryl_db] - lk_ch: [lookup_table] - }.set{ multimap_inputs_ch } - - input[0] = multimap_inputs_ch.asm_ch - input[1] = multimap_inputs_ch.meryl_ch - input[2] = multimap_inputs_ch.lk_ch - input[3] = multimap_inputs_ch.asm_meryl_ch - input[4] = 10 + input[0] = METABULI_ADD.out.db + input[1] = file(params.modules_testdata_base_path + 'delete_me/metabuli/acc2taxid', checkIfExists: true) """ } } @@ -459,129 +65,20 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.get(0), process.out.get(2)).match() }, - // The following asserts that the output stdout_log generated by the module is complete - { assert path(process.out.get(1).get(0)).readLines().last().contains("Bye!") } + { assert snapshot (process.out).match()}, ) } - } - test("Multiple samples - sarscov2 + bacteroides_fragilis - paired-end illumina reads - stub") { + test("sarscov2 protein_db stub") { options "-stub" - setup { - - run("SEQTK_MERGEPE") { - script "../../../seqtk/mergepe/main.nf" - config "./modules.config" - tag "seqtk/mergepe" - process { - """ - input[0] = Channel.from([ - [ id:'test_sars', single_end:false ], // meta map - [ - file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + '/genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) - ] - ], [ - [ id:'test_fragilis', single_end:false ], // meta map - [ - file(params.modules_testdata_base_path + '/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + '/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_2.fastq.gz', checkIfExists: true) - ] - ]) - """ - } - } - - run("MERYL_COUNT", alias: "MERYL_COUNT_READS") { - script "../../../meryl/count/main.nf" - config "./modules.config" - tag "meryl/count" - process { - """ - input[0] = SEQTK_MERGEPE.out.reads - input[1] = Channel.value(6) - """ - } - } - - run("MERYL_COUNT", alias: "MERYL_COUNT_ASM") { - script "../../../meryl/count/main.nf" - config "./modules.config" - tag "meryl/count" - process { - """ - input[0] = Channel.from([ - [ id:'test_sars', single_end:false ], // meta map - file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ],[ - [ id:'test_fragilis', single_end:false ], // meta map - file(params.modules_testdata_base_path + '/genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) - ]) - input[1] = Channel.value(6) - """ - } - } - - run("MERYL_HISTOGRAM") { - script "../../../meryl/histogram/main.nf" - config "./modules.config" - tag "meryl/histogram" - process { - """ - input[0] = MERYL_COUNT_READS.out.meryl_db - input[1] = Channel.value(6) - """ - } - } - - run("GENOMESCOPE2") { - script "../../../genomescope2/main.nf" - config "./modules.config" - tag "genomescope2" - process { - """ - input[0] = MERYL_HISTOGRAM.out.hist - """ - } - } - - - - - } - when { process { """ - Channel.from([ - [ id:'test_sars', single_end:false ], // meta map - file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ],[ - [ id:'test_fragilis', single_end:false ], // meta map - file(params.modules_testdata_base_path + '/genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) - ]) - .set{ input_zero_ch } - - - input_zero_ch.join( MERYL_COUNT_READS.out.meryl_db ) - .join( MERYL_COUNT_ASM.out.meryl_db ) - .join( GENOMESCOPE2.out.lookup_table ) - .multiMap{ meta, asm, meryl_db, asm_meryl_db, lookup_table -> - asm_ch: [meta, asm] - meryl_ch: [meta, meryl_db] - asm_meryl_ch: [asm_meryl_db] - lk_ch: [lookup_table] - }.set{ multimap_inputs_ch } - - input[0] = multimap_inputs_ch.asm_ch - input[1] = multimap_inputs_ch.meryl_ch - input[2] = multimap_inputs_ch.lk_ch - input[3] = MERYL_COUNT_ASM.out.meryl_db.map{ meta, meryl_db -> meryl_db } - input[4] = 10 + input[0] = METABULI_ADD.out.db + input[1] = [] """ } } @@ -589,9 +86,8 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.get(0), process.out.get(2)).match() } + { assert snapshot (process.out).match()} ) } - } } diff --git a/modules/nf-core/metabuli/build/tests/main.nf.test.snap b/modules/nf-core/metabuli/build/tests/main.nf.test.snap index 2d1f4e9da8f..74f66077987 100644 --- a/modules/nf-core/metabuli/build/tests/main.nf.test.snap +++ b/modules/nf-core/metabuli/build/tests/main.nf.test.snap @@ -1,184 +1,177 @@ { - "Multiple samples - sarscov2 + bacteroides_fragilis - paired-end illumina reads": { - "content": [ - [ - [ - { - "id": "test_fragilis", - "single_end": false - }, - "test_fragilis.hist:md5,3380e6f8b82739edc44b45ac6b50525e" - ], - [ - { - "id": "test_sars", - "single_end": false - }, - "test_sars.hist:md5,564c5dc22b9a10041b248405bb2b15d0" - ] - ], - [ - "versions.yml:md5,078b89694148fc55475e89987173ac4e", - "versions.yml:md5,078b89694148fc55475e89987173ac4e" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-04-02T11:05:51.416187" - }, - "sarscov2 - paired-end illumina reads - stub": { + "sarscov2 protein_db": { "content": [ { "0": [ [ { - "id": "test", - "single_end": false + "id": "test" }, - "test.hist:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "acc2taxid.map:md5,4eb8b3bde8a5cc1414afeeeab385e208", + "db.parameters:md5,481509eb7cddc0ec93fbcd61b7820616", + "diffIdx:md5,8ac88d03fabf6917698a02841e410b33", + "info:md5,ba18c68b0d90630fad4dfe02b5101b72", + [ + "694009.fna:md5,e73599798195a519ba2565c3f0275b93" + ], + "split:md5,6306fc471e89174e65c71fd0cc69f780", + "taxID_list:md5,40064b0651b7d558820d6fc1014de0fd", + [ + "acc2taxid:md5,4ae5ac3b865cb7bf301c3cc72153a7a8", + "merged.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", + "names.dmp:md5,130f9132095562e09c732679c562f5e9", + "nodes.dmp:md5,c471c27a4ce85ae74d2c63633c9ce1e3" + ], + "taxonomyDB:md5,029a03295afff91c5d2a011c7e8b8db5", + "unmapped.txt:md5,b8c64bc37b7dc465fdaf61b3cd90c29a" + ] ] ], "1": [ - "test.hist.stderr.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ], - "2": [ - "versions.yml:md5,078b89694148fc55475e89987173ac4e" + "versions.yml:md5,acaa33b47a40006743150a1f9db99393" ], - "hist": [ + "db": [ [ { - "id": "test", - "single_end": false + "id": "test" }, - "test.hist:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "acc2taxid.map:md5,4eb8b3bde8a5cc1414afeeeab385e208", + "db.parameters:md5,481509eb7cddc0ec93fbcd61b7820616", + "diffIdx:md5,8ac88d03fabf6917698a02841e410b33", + "info:md5,ba18c68b0d90630fad4dfe02b5101b72", + [ + "694009.fna:md5,e73599798195a519ba2565c3f0275b93" + ], + "split:md5,6306fc471e89174e65c71fd0cc69f780", + "taxID_list:md5,40064b0651b7d558820d6fc1014de0fd", + [ + "acc2taxid:md5,4ae5ac3b865cb7bf301c3cc72153a7a8", + "merged.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", + "names.dmp:md5,130f9132095562e09c732679c562f5e9", + "nodes.dmp:md5,c471c27a4ce85ae74d2c63633c9ce1e3" + ], + "taxonomyDB:md5,029a03295afff91c5d2a011c7e8b8db5", + "unmapped.txt:md5,b8c64bc37b7dc465fdaf61b3cd90c29a" + ] ] ], - "log_stderr": [ - "test.hist.stderr.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ], "versions": [ - "versions.yml:md5,078b89694148fc55475e89987173ac4e" + "versions.yml:md5,acaa33b47a40006743150a1f9db99393" ] } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-04-02T11:04:28.744584" + "timestamp": "2024-06-21T13:06:57.889836817" }, - "sarscov2 - paired-end illumina reads plus inputs lookup_table and seqmers - stub": { + "sarscov2 - protein_db - acc2taxid": { "content": [ { "0": [ [ { - "id": "test", - "single_end": false + "id": "test" }, - "test.hist:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "acc2taxid.map:md5,4eb8b3bde8a5cc1414afeeeab385e208", + "db.parameters:md5,481509eb7cddc0ec93fbcd61b7820616", + "diffIdx:md5,8ac88d03fabf6917698a02841e410b33", + "info:md5,ba18c68b0d90630fad4dfe02b5101b72", + [ + "694009.fna:md5,e73599798195a519ba2565c3f0275b93" + ], + "split:md5,6306fc471e89174e65c71fd0cc69f780", + "taxID_list:md5,40064b0651b7d558820d6fc1014de0fd", + [ + "acc2taxid:md5,4ae5ac3b865cb7bf301c3cc72153a7a8", + "merged.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", + "names.dmp:md5,130f9132095562e09c732679c562f5e9", + "nodes.dmp:md5,c471c27a4ce85ae74d2c63633c9ce1e3" + ], + "taxonomyDB:md5,8efd02b34af565a1a808e43bc239c75a", + "unmapped.txt:md5,b8c64bc37b7dc465fdaf61b3cd90c29a" + ] ] ], "1": [ - "test.hist.stderr.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ], - "2": [ - "versions.yml:md5,078b89694148fc55475e89987173ac4e" + "versions.yml:md5,acaa33b47a40006743150a1f9db99393" ], - "hist": [ + "db": [ [ { - "id": "test", - "single_end": false + "id": "test" }, - "test.hist:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "acc2taxid.map:md5,4eb8b3bde8a5cc1414afeeeab385e208", + "db.parameters:md5,481509eb7cddc0ec93fbcd61b7820616", + "diffIdx:md5,8ac88d03fabf6917698a02841e410b33", + "info:md5,ba18c68b0d90630fad4dfe02b5101b72", + [ + "694009.fna:md5,e73599798195a519ba2565c3f0275b93" + ], + "split:md5,6306fc471e89174e65c71fd0cc69f780", + "taxID_list:md5,40064b0651b7d558820d6fc1014de0fd", + [ + "acc2taxid:md5,4ae5ac3b865cb7bf301c3cc72153a7a8", + "merged.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", + "names.dmp:md5,130f9132095562e09c732679c562f5e9", + "nodes.dmp:md5,c471c27a4ce85ae74d2c63633c9ce1e3" + ], + "taxonomyDB:md5,8efd02b34af565a1a808e43bc239c75a", + "unmapped.txt:md5,b8c64bc37b7dc465fdaf61b3cd90c29a" + ] ] ], - "log_stderr": [ - "test.hist.stderr.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ], "versions": [ - "versions.yml:md5,078b89694148fc55475e89987173ac4e" + "versions.yml:md5,acaa33b47a40006743150a1f9db99393" ] } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-04-02T11:04:51.831185" - }, - "sarscov2 - paired-end illumina reads plus optional inputs lookup_table and seqmers": { - "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.hist:md5,564c5dc22b9a10041b248405bb2b15d0" - ] - ], - [ - "versions.yml:md5,078b89694148fc55475e89987173ac4e" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-04-02T11:04:41.739829" + "timestamp": "2024-06-21T13:08:00.235429024" }, - "Multiple samples - sarscov2 + bacteroides_fragilis - paired-end illumina reads - stub": { + "sarscov2 protein_db stub": { "content": [ - [ - [ - { - "id": "test_fragilis", - "single_end": false - }, - "test_fragilis.hist:md5,d41d8cd98f00b204e9800998ecf8427e" + { + "0": [ + [ + { + "id": "test" + }, + [ + + ] + ] ], - [ - { - "id": "test_sars", - "single_end": false - }, - "test_sars.hist:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - [ - "versions.yml:md5,078b89694148fc55475e89987173ac4e", - "versions.yml:md5,078b89694148fc55475e89987173ac4e" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-04-02T11:06:03.566853" - }, - "sarscov2 - paired-end illumina reads": { - "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.hist:md5,e722be64a7fffeec654a10bea92ec619" + "1": [ + "versions.yml:md5,acaa33b47a40006743150a1f9db99393" + ], + "db": [ + [ + { + "id": "test" + }, + [ + + ] + ] + ], + "versions": [ + "versions.yml:md5,acaa33b47a40006743150a1f9db99393" ] - ], - [ - "versions.yml:md5,078b89694148fc55475e89987173ac4e" - ] + } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-04-02T11:04:20.733553" + "timestamp": "2024-06-21T13:08:14.59465885" } } \ No newline at end of file diff --git a/modules/nf-core/metabuli/build/tests/nextflow.config b/modules/nf-core/metabuli/build/tests/nextflow.config new file mode 100644 index 00000000000..32a9775a4b1 --- /dev/null +++ b/modules/nf-core/metabuli/build/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: METABULI_BUILD { + memory = 12.GB + ext.args = '--db-date 2024-6-21' + } +} diff --git a/modules/nf-core/metabuli/classify/environment.yml b/modules/nf-core/metabuli/classify/environment.yml index 78c213cdfea..ce1ebe71f5b 100644 --- a/modules/nf-core/metabuli/classify/environment.yml +++ b/modules/nf-core/metabuli/classify/environment.yml @@ -3,4 +3,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::metabuli=1.0.5 + - "bioconda::metabuli=1.0.5" diff --git a/modules/nf-core/metabuli/classify/main.nf b/modules/nf-core/metabuli/classify/main.nf index 02096f785c9..e0994ea975b 100644 --- a/modules/nf-core/metabuli/classify/main.nf +++ b/modules/nf-core/metabuli/classify/main.nf @@ -2,19 +2,19 @@ process METABULI_CLASSIFY { tag "$meta.id" label 'process_medium' - conda "bioconda::metabuli=1.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/metabuli:1.0.5--pl5321h6a68c12_1': 'biocontainers/metabuli:1.0.5--pl5321h6a68c12_1' }" input: tuple val(meta), path(fastas) - path(db) + tuple val(meta2), path(db) output: - tuple val(meta), path("*/*_classifications.tsv"), emit: classification - tuple val(meta), path("*/*_report.tsv") , emit: report - path "versions.yml" , emit: versions + tuple val(meta), path("*classifications.tsv"), emit: classification + tuple val(meta), path("*report.tsv") , emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -22,31 +22,33 @@ process METABULI_CLASSIFY { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def is_compressed = meta.single_end ? fastas.getName().endsWith(".gz") : fastas[0].getName().endsWith(".gz") - def input = meta.single_end ? "--seq-mode 1 ${fastas}" : "${fastas[0]} ${fastas[1]}" - if (is_compressed && meta.single_end) { - input = "--seq-mode 1 ${fastas.baseName}" - } else if (is_compressed) { - input = "${fastas[0].baseName} ${fastas[1].baseName}" - } - """ - if [ "$is_compressed" == "true" ]; then - gzip -d *.gz - fi - metabuli \\ classify \\ $args \\ + ${fastas} \\ --threads $task.cpus \\ - ${input} \\ + --max-ram ${task.memory.toGiga()} \\ ${db} \\ - ${prefix}_out \\ + . \\ ${prefix} cat <<-END_VERSIONS > versions.yml "${task.process}": - metabuli: \$(metabuli | grep Version | sed 's/^metabuli Version: //';)) + metabuli: \$(metabuli version) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch "${prefix}_classifications.tsv" + touch "${prefix}_report.tsv" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + metabuli: \$(metabuli version) END_VERSIONS """ } diff --git a/modules/nf-core/metabuli/classify/tests/main.nf.test b/modules/nf-core/metabuli/classify/tests/main.nf.test new file mode 100644 index 00000000000..35a3f7e9ce5 --- /dev/null +++ b/modules/nf-core/metabuli/classify/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_process { + name "Test Process METABULI_CLASSIFY" + script "../main.nf" + process "METABULI_CLASSIFY" + tag "modules" + tag "modules_nfcore" + tag "untar" + tag "metabuli" + tag "metabuli/classify" + + setup { + run("UNTAR") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + "delete_me/metabuli/metabuli_db.tar.gz", checkIfExists: true) + ]) + """ + } + } + } + + test("sarscov2 illumina single end [fastq]") { + config "./nextflow-se.config" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file( + params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], + checkIfExists: true + )] + ] + input[1] = UNTAR.out.untar + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match()}, + ) + } + } + + test("sarscov2 illumina paired end [fastq]") { + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file( + params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], + checkIfExists: true + ), + file( + params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], + checkIfExists: true + ) + + ] + ] + input[1] = UNTAR.out.untar + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match()}, + ) + } + } +} diff --git a/modules/nf-core/metabuli/classify/tests/main.nf.test.snap b/modules/nf-core/metabuli/classify/tests/main.nf.test.snap new file mode 100644 index 00000000000..a81aa198e67 --- /dev/null +++ b/modules/nf-core/metabuli/classify/tests/main.nf.test.snap @@ -0,0 +1,82 @@ +{ + "sarscov2 illumina single end [fastq]": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + + ], + "classification": [ + + ], + "report": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T13:33:51.151143139" + }, + "sarscov2 illumina paired end [fastq]": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.kraken2.report.txt:md5,4227755fe40478b8d7dc8634b489761e" + ] + ], + [ + "versions.yml:md5,79adf2ca1cfc625cb77e391b27142c43" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-04T18:47:13.75649" + }, + "sarscov2 illumina single end [fastq] + save_reads_assignment": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.kraken2.report.txt:md5,4227755fe40478b8d7dc8634b489761e" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.kraken2.classifiedreads.txt:md5,e7a90531f0d8d777316515c36fe4cae0" + ] + ], + [ + "versions.yml:md5,79adf2ca1cfc625cb77e391b27142c43" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-04T18:47:22.459465" + } +} \ No newline at end of file diff --git a/modules/nf-core/metabuli/classify/tests/nextflow-se.config b/modules/nf-core/metabuli/classify/tests/nextflow-se.config new file mode 100644 index 00000000000..f8aca203f12 --- /dev/null +++ b/modules/nf-core/metabuli/classify/tests/nextflow-se.config @@ -0,0 +1,5 @@ +process { + withName: METABULI_CLASSIFY { + ext.args = '--seq-mode 1' + } +} diff --git a/modules/nf-core/metabuli/classify/tests/tags.yml b/modules/nf-core/metabuli/classify/tests/tags.yml new file mode 100644 index 00000000000..9ebfd7ab6ee --- /dev/null +++ b/modules/nf-core/metabuli/classify/tests/tags.yml @@ -0,0 +1,3 @@ +kraken2/kraken2: + - modules/nf-core/kraken2/kraken2/** + - modules/nf-core/untar/** From d159b8c903104f3be72e61ff898674b1422e2325 Mon Sep 17 00:00:00 2001 From: Joon-Klaps Date: Wed, 3 Jul 2024 13:05:53 +0000 Subject: [PATCH 20/20] fix linting & resource requirements --- modules/nf-core/metabuli/add/environment.yml | 1 + modules/nf-core/metabuli/add/meta.yml | 2 +- .../nf-core/metabuli/build/environment.yml | 1 + modules/nf-core/metabuli/build/meta.yml | 2 +- .../metabuli/build/tests/nextflow.config | 2 +- .../nf-core/metabuli/classify/environment.yml | 1 + modules/nf-core/metabuli/classify/meta.yml | 2 +- .../metabuli/classify/tests/main.nf.test | 35 ++++++++++++++----- 8 files changed, 33 insertions(+), 13 deletions(-) diff --git a/modules/nf-core/metabuli/add/environment.yml b/modules/nf-core/metabuli/add/environment.yml index 0e5eef64f23..492ffab0470 100644 --- a/modules/nf-core/metabuli/add/environment.yml +++ b/modules/nf-core/metabuli/add/environment.yml @@ -2,5 +2,6 @@ name: "metabuli_add" channels: - conda-forge - bioconda + - defaults dependencies: - "bioconda::metabuli=1.0.5" diff --git a/modules/nf-core/metabuli/add/meta.yml b/modules/nf-core/metabuli/add/meta.yml index c8a4e70f0a1..f7a9f78f66c 100644 --- a/modules/nf-core/metabuli/add/meta.yml +++ b/modules/nf-core/metabuli/add/meta.yml @@ -13,7 +13,7 @@ tools: documentation: "https://github.com/steineggerlab/Metabuli#readme" tool_dev_url: "https://github.com/steineggerlab/Metabuli" doi: "10.1101/2023.05.31.543018" - licence: "['GPL v3']" + licence: ['GPL v3'] input: - meta: diff --git a/modules/nf-core/metabuli/build/environment.yml b/modules/nf-core/metabuli/build/environment.yml index 96f5393db66..b165712d709 100644 --- a/modules/nf-core/metabuli/build/environment.yml +++ b/modules/nf-core/metabuli/build/environment.yml @@ -2,5 +2,6 @@ name: "metabuli_build" channels: - conda-forge - bioconda + - defaults dependencies: - "bioconda::metabuli=1.0.5" diff --git a/modules/nf-core/metabuli/build/meta.yml b/modules/nf-core/metabuli/build/meta.yml index fe320627286..cc77e4fa5a9 100644 --- a/modules/nf-core/metabuli/build/meta.yml +++ b/modules/nf-core/metabuli/build/meta.yml @@ -13,7 +13,7 @@ tools: documentation: "https://github.com/steineggerlab/Metabuli#readme" tool_dev_url: "https://github.com/steineggerlab/Metabuli" doi: "10.1101/2023.05.31.543018" - licence: "['GPL v3']" + licence: ['GPL v3'] input: - meta: diff --git a/modules/nf-core/metabuli/build/tests/nextflow.config b/modules/nf-core/metabuli/build/tests/nextflow.config index 32a9775a4b1..d1468cac635 100644 --- a/modules/nf-core/metabuli/build/tests/nextflow.config +++ b/modules/nf-core/metabuli/build/tests/nextflow.config @@ -1,6 +1,6 @@ process { withName: METABULI_BUILD { - memory = 12.GB + memory = 7.GB ext.args = '--db-date 2024-6-21' } } diff --git a/modules/nf-core/metabuli/classify/environment.yml b/modules/nf-core/metabuli/classify/environment.yml index ce1ebe71f5b..abb788f31a2 100644 --- a/modules/nf-core/metabuli/classify/environment.yml +++ b/modules/nf-core/metabuli/classify/environment.yml @@ -2,5 +2,6 @@ name: metabuli_classify channels: - conda-forge - bioconda + - defaults dependencies: - "bioconda::metabuli=1.0.5" diff --git a/modules/nf-core/metabuli/classify/meta.yml b/modules/nf-core/metabuli/classify/meta.yml index fd910db5c21..3dcd9e99219 100644 --- a/modules/nf-core/metabuli/classify/meta.yml +++ b/modules/nf-core/metabuli/classify/meta.yml @@ -15,7 +15,7 @@ tools: documentation: "https://github.com/steineggerlab/Metabuli#readme" tool_dev_url: "https://github.com/steineggerlab/Metabuli" doi: "10.1101/2023.05.31.543018" - licence: "['GPL v3']" + licence: ['GPL v3'] input: - meta: diff --git a/modules/nf-core/metabuli/classify/tests/main.nf.test b/modules/nf-core/metabuli/classify/tests/main.nf.test index 35a3f7e9ce5..38b79ff54ca 100644 --- a/modules/nf-core/metabuli/classify/tests/main.nf.test +++ b/modules/nf-core/metabuli/classify/tests/main.nf.test @@ -7,16 +7,34 @@ nextflow_process { tag "untar" tag "metabuli" tag "metabuli/classify" + tag "metabuli/add" + tag "metabuli/build" setup { - run("UNTAR") { - script "modules/nf-core/untar/main.nf" + run("METABULI_ADD") { + script "modules/nf-core/metabuli/add/main.nf" process { """ - input[0] = Channel.of([ - [], - file(params.modules_testdata_base_path + "delete_me/metabuli/metabuli_db.tar.gz", checkIfExists: true) - ]) + input[0] = [ + [ id:'test' ], + [ + file(params.modules_testdata_base_path +'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path +'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true), + ] + ] + input[1] = file(params.modules_testdata_base_path +'genomics/sarscov2/metagenome/prot_names.dmp', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path +'genomics/sarscov2/metagenome/prot_nodes.dmp', checkIfExists: true) + input[3] = [] // needs a merged.dmp + input[4] = file(params.modules_testdata_base_path + 'delete_me/metabuli/acc2taxid', checkIfExists: true) + """ + } + } + run("METABULI_BUILD") { + script "modules/nf-core/metabuli/build/main.nf" + process { + """ + input[0] = METABULI_ADD.out.db + input[1] = [] """ } } @@ -34,7 +52,7 @@ nextflow_process { checkIfExists: true )] ] - input[1] = UNTAR.out.untar + input[1] = METABULI_BUILD.out.db """ } } @@ -49,7 +67,6 @@ nextflow_process { test("sarscov2 illumina paired end [fastq]") { when { - process { """ input[0] = [ @@ -66,7 +83,7 @@ nextflow_process { ] ] - input[1] = UNTAR.out.untar + input[1] = METABULI_BUILD.out.db """ } }