# -----------------------------------------------------------------------------
# Copyright (c) 2018 The Regents of the University of California
#
# This file is part of kevlar (http://github.com/dib-lab/kevlar) and is
# licensed under the MIT license: see LICENSE.
# -----------------------------------------------------------------------------
import os


NUMCONTROLS = len(config['controls'])
SAMPLES = ['case'] + ['ctrl{}'.format(i) for i in range(NUMCONTROLS)]

# -----------------------------------------------------------------------------
# Primary target
# -----------------------------------------------------------------------------

rule reads:
    input: expand('Reads/{prefix}.reads.ec.fq.gz', prefix=SAMPLES)
    output: touch('preprocessing.complete')


# -----------------------------------------------------------------------------
# Create standardized named links to input BAM files for internal use.
# -----------------------------------------------------------------------------

rule link_bams:
    input:
        config['case'],
        *config['controls'],
    output:
        'Alignments/case.bam',
        expand('Alignments/ctrl{serial}.bam', serial=range(NUMCONTROLS)),
    threads: 1
    message: 'Create symlinks to input BAM files.'
    run:
        for inbam, outbam in zip(input, output):
            os.symlink(os.path.abspath(inbam), outbam)


# -----------------------------------------------------------------------------
# Convert reads to unpaired Fastq and compress.
#
# $ samtools fastq <args> | bgzip <args> > seqs.fq.gz
# -----------------------------------------------------------------------------

rule bam_to_fastq:
    input: 'Alignments/{indiv}.bam'
    output:
        pipe('Reads/{indiv}.reads.fq'),
        'Logs/{indiv}-fastq.log'
    threads: 36
    message: 'Convert read alignments to unpaired Fastq format.'
    shell: 'samtools fastq -N -F 2304 -@ {threads} {input} > {output[0]} 2> >(tee {output[1]})'

rule compress_unpaired_fastq:
    input: 'Reads/{indiv}.reads.fq'
    output: 'Reads/{indiv}.reads.fq.gz'
    threads: 36
    message: 'Compress unpaired reads.'
    shell: 'bgzip -@ {threads} -c {input} > {output}'


# -----------------------------------------------------------------------------
# Perform quality control on reads.
# -----------------------------------------------------------------------------

rule qc:
    input: 'Reads/{prefix}.reads.fq.gz'
    output:
        pipe('Reads/{prefix}.reads.qc.fq'),
        'Logs/{prefix}-fastp.json',
        'Logs/{prefix}-fastp.html',
        'Logs/{prefix}-qc.log'
    threads: 16
    message: 'Perform quality control on reads.'
    shell: 'fastp -i {input} --interleaved_in --stdout -p --thread {threads} --json {output[1]} --html {output[2]} -q 15 -u 40 -l 15 > {output[0]} 2> >(tee {output[3]})'

rule compress_qc_reads:
    input: 'Reads/{prefix}.reads.qc.fq'
    output: 'Reads/{prefix}.reads.qc.fq.gz'
    threads: 56
    message: 'Compress quality controlled reads.'
    shell: 'bgzip -@ {threads} -c {input} > {output}'


# -----------------------------------------------------------------------------
# Perform error correction on reads
# -----------------------------------------------------------------------------

rule trusted_kmers:
    input: expand('Reads/{prefix}.reads.qc.fq.gz', prefix=SAMPLES)
    output:
        'lighter.trustedkmers',
        'Logs/trustedkmers.log'
    threads: 72
    message: 'Compute trusted k-mers across all samples for error correction.'
    shell: 'lighter -K 27 3100000000 -r {input[0]} -r {input[1]} -r {input[2]} -saveTrustedKmers {output[0]} -t {threads} 2>&1 | tee {output[1]}'

rule ec:
    input:
        'Reads/{prefix}.reads.qc.fq.gz',
        'lighter.trustedkmers'
    output:
        'Reads/{prefix}.reads.ec.fq.gz',
        'Logs/{prefix}-ec.log'
    threads: 72
    message: 'Perform error correction sample by sample.'
    shell: 'lighter -K 27 3100000000 -r {input[0]} -loadTrustedKmers {input[1]} -t {threads} 2>&1 | tee {output[1]} && mv {wildcards.prefix}.qc.reads.cor.fq.gz {output[0]}'
