TADA/nextflow.config at main · HPCBio/TADA · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    HPCBio/TADA Nextflow config file
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Default config options for all compute environments
----------------------------------------------------------------------------------------
*/

// Global default params, used in configs
params {
    input                      = null
    multiqc_config             = null
    multiqc_title              = null
    multiqc_logo               = null
    max_multiqc_email_size     = '25.MB'
    multiqc_methods_description = null

    // TODO: these aren't used at the moment. With some it's possible
    // to use the name as a key to preset parameters, and with others
    // run a Figaro-like step in preqc to evaluate reads and determine
    // potential downstream settings.
    amplicon                   = "16S"
    amplicon_name              = ""
    min_predicted_length       = 0
    max_predicted_length       = 0
    paired_type                = "overlapping" // "full_length", "overlapping", "dovetail", "mix", "nonoverlapping"

    // TODO: platform is used in a generally hacky way here, but we
    // could use this to possibly preset some parameters
    platform                   = "illumina" // "illumina", "pacbio"; ONT, 454, Element, others may be added

    // TODO: remove and determine on the fly from the sample sheet
    strategy                   = "paired"  // method for trimming and denoising reads; "single" or "paired"

    // QC
    preqc_only                 = false
    preqc_samples              = 0
    skip_FASTQC                = false
    skip_dadaQC                = false
    skip_MultiQC               = false
    skip_merging_check         = true
    // TODO: This one needs a related graph; for an example see:
    // https://github.com/benjjneb/dada2/issues/236#issuecomment-422865307
    skip_ee_check              = true

    // Trimming
    skip_trimming              = false
    trimmer                    = "dada2"

    // when true (default), this sets cutadapt's trimming
    // (which uses linked adapters) to require
    // *both* primers be present.

    // With some kits like StrainID this can be an issue (can have
    // some truncated reads at the 5' or 3' end) and so can be relaxed
    // by setting to false.
    cutadapt_strict_match      = true

    // for paired end data only: set if there is a potential for R1
    // to sequence into the 5' primer for R2 (e.g., as seen with ITS)
    cutadapt_dovetail          = false

    // required for cutadapt
    for_primer                 = ""
    rev_primer                 = ""

    // general trimming settings
    trim_for                   = 0
    trim_rev                   = 0
    trunc_for                  = 0
    trunc_rev                  = 0

    // read filtering
    // cutadapt only has one setting, takes the max of the two
    maxEE_for                  = 2
    maxEE_rev                  = 2
    truncQ                     = 2
    maxN                       = 0
    min_read_len               = 50
    max_read_len               = 10000
    // a general flag for trimming with Illumina modern two-color sequencing
    illumina_twocolor          = false
    // I think we can make these bool 'false' as above with R coersion (either through as.logical or using optparse in a Rscript)
    rmPhiX                     = false

    // learnErrors options
    // TODO: deprecate quality_binning in favor of models
    // Set to true if using binned qualities (NovaSeq, PacBio Revio)
    quality_binning            = false
    // NYI
    // this should be checked at the beginning
    learnerrors_function        = "loessErrfun"
    // loessErrfun, PacBioErrfun, makeBinnedQualErrfun, noqualErrfun, custom
    // this should be checked at the beginning if error_function="custom"
    learnerrors_custom_code     = ""
    // this is currently required to be set if makeBinnedQualErrfun is set
    learnerrors_quality_bins    = ""

    // TODO: some of the common ones overlap with dada options,
    // so we should try finding some way to make this simpler
    // between the two
    learnerrors_opts            = ""

    /* DADA function options */
    // The prior version allowed for key-val pairings, but
    // these pairings in Groovy don't always translate well
    // to R parameters, so we use simple strings for now
    // TODO: needs to be fixed
    dada_opts                   = ""

    // ASV inference pooling
    pool                       = "pseudo"
    for_priors                 = ""
    rev_priors                 = ""

    // Merging
    min_overlap                = 20
    max_mismatch               = 0
    trim_overhang              = false
    just_concatenate           = false
    rescue_unmerged            = false

    // Pre-chimera sequence tables. This pulls in one or more sequence tables
    // from independent sequencing runs, merges them, and runs
    // downstream analysis. The only supported sequence table format
    // is the original version from DADA2 (ASV names are the
    // sequence, with counts per sample). As these are run through
    // chimera detection, these should be pre-chimera removal data.
    // seq_tables = false

    // Chimera detection
    skip_chimera_detection     = false
    removeBimeraDenovo_options = ""

    // General ASV filtering
    min_asv_len                = 0 // Only run if set > 1
    max_asv_len                = 0 // Only run if set > 1

    // Search-based filtering
    // This is still alpha!!!
    search_filter              = "none" // currently only "mmseqs"
    search_filter_dryrun       = true
    mmseqs_method              = "search" // search; profile, taxonomy TBI
    mmseqs_args                = ""
    mmseqs_fasta               = "" // FASTA sequences to format
    mmseqs_database            = "" // path to database with prefix name
    infernal_model             = "" // NYI
    tax_filter                 = false
    tax_filter_rank            = "Phylum"

    // other options to be added when needed
    // Taxonomic assignment
    // TODO: set flag to skip these explicitly
    // skip_taxonomic_assignment = false
    tax_assignment_method      = 'rdp'
    reference                  = ""
    species                    = ""
    min_boot                   = 50
    tax_ranks                  = ""

    // batch size of ASVs to run through assignTaxonomy/assignSpecies, 0 = run everything
    tax_batch                  = 0

    // Multiple Sequence Alignment
    skip_alignment             = false
    aligner                    = 'DECIPHER' // default
    // infernalCM  = false

    // Phylogenetic analysis, requires MSA above
    skip_tree                  = false
    phylo_tool                 = 'fasttree' // default, current alternative is 'phangorn'

    // MultiQC

    // additional outputs
    to_BIOM                    = true   // generate BIOM v1 output
    to_QIIME2                  = true   // generate QZA artifacts for QIIME2

    // Renaming
    id_type                    = "md5"  // simple, md5; others may be added

    // other parameters
    random_seed                = 100

    // Boilerplate options
    outdir                     = null
    publish_dir_mode           = 'copy'
    email                      = null
    email_on_fail              = null
    plaintext_email            = false
    monochrome_logs            = false
    hook_url                   = null
    help                       = false
    version                    = false

    // Config options
    config_profile_name        = null
    config_profile_description = null
    custom_config_version      = 'master'
    custom_config_base         = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
    config_profile_contact     = null
    config_profile_url         = null

    // Max resource options
    // Defaults only, expecting to be overwritten
    max_memory                 = '128.GB'
    max_cpus                   = 16
    max_time                   = '240.h'

    // Schema validation default options
    validationFailUnrecognisedParams = false
    validationLenientMode            = false
    validationSchemaIgnoreParams     = 'genomes,igenomes_base'
    validationShowHiddenParams       = false
    validate_params                  = true
}

// Load base.config by default for all pipelines
includeConfig 'conf/base.config'

// Load nf-core custom profiles from different Institutions
try {
    includeConfig "${params.custom_config_base}/nfcore_custom.config"
} catch (Exception e) {
    System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config")
}

// Load h3abionet/TADA custom profiles from different institutions.
// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs!
// try {
//   includeConfig "${params.custom_config_base}/pipeline/tada.config"
// } catch (Exception e) {
//   System.err.println("WARNING: Could not load nf-core/config/tada profiles: ${params.custom_config_base}/pipeline/tada.config")
// }
profiles {
    debug {
        dumpHashes             = true
        process.beforeScript   = 'echo $HOSTNAME'
        cleanup                = false
        nextflow.enable.configProcessNamesValidation = true
    }
    conda {
        conda.enabled          = true
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        channels               = ['conda-forge', 'bioconda', 'defaults']
        apptainer.enabled      = false
    }
    mamba {
        conda.enabled          = true
        conda.useMamba         = true
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
    }
    docker {
        docker.enabled         = true
        conda.enabled          = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
        docker.runOptions      = '-u $(id -u):$(id -g)'
    }
    arm {
        docker.runOptions      = '-u $(id -u):$(id -g) --platform=linux/amd64'
    }
    singularity {
        singularity.enabled    = true
        singularity.autoMounts = true
        conda.enabled          = false
        docker.enabled         = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
    }
    podman {
        podman.enabled         = true
        conda.enabled          = false
        docker.enabled         = false
        singularity.enabled    = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
    }
    shifter {
        shifter.enabled        = true
        conda.enabled          = false
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
    }
    charliecloud {
        charliecloud.enabled   = true
        conda.enabled          = false
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        apptainer.enabled      = false
    }
    apptainer {
        apptainer.enabled      = true
        apptainer.autoMounts   = true
        conda.enabled          = false
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
    }
    gitpod {
        executor.name          = 'local'
        executor.cpus          = 4
        executor.memory        = 8.GB
    }
    test         { includeConfig 'conf/test.config'              }
    test_se      { includeConfig 'conf/test_illumina_se.config'  }
    test_pacbio  { includeConfig 'conf/test_pacbio.config'       }
    test_full    { includeConfig 'conf/test_full.config'         }
}

// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile
// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled
// Set to your registry if you have a mirror of containers
apptainer.registry   = 'quay.io'
docker.registry      = 'quay.io'
podman.registry      = 'quay.io'
singularity.registry = 'quay.io'

// Nextflow plugins
plugins {
    id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet
}

// Export these variables to prevent local Python/R libraries from conflicting with those in the container
// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container.
// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.

env {
    PYTHONNOUSERSITE = 1
    R_PROFILE_USER   = "/.Rprofile"
    R_ENVIRON_USER   = "/.Renviron"
    JULIA_DEPOT_PATH = "/usr/local/share/julia"
}

// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']

// Disable process selector warnings by default. Use debug profile to enable warnings.
nextflow.enable.configProcessNamesValidation = false

def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
timeline {
    enabled = true
    file    = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html"
}
report {
    enabled = true
    file    = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html"
}
trace {
    enabled = true
    file    = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt"
}
dag {
    enabled = true
    file    = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html"
}

manifest {
    name            = 'h3abionet/TADA'
    author          = """Chris Fields"""
    homePage        = 'https://github.com/h3abionet/TADA'
    description     = """Targeted Amplicon Diversity Analysis"""
    mainScript      = 'main.nf'
    nextflowVersion = '!>=23.04.0'
    version         = '2.0.0-alpha.1'
    doi             = ''
}

// Load modules.config for DSL2 module specific options
includeConfig 'conf/modules.config'

// Function to ensure that resource requirements don't go beyond
// a maximum limit
def check_max(obj, type) {
    if (type == 'memory') {
        try {
            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
                return params.max_memory as nextflow.util.MemoryUnit
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'time') {
        try {
            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
                return params.max_time as nextflow.util.Duration
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'cpus') {
        try {
            return Math.min( obj, params.max_cpus as int )
        } catch (all) {
            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
            return obj
        }
    }
}