#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 14 06:02:38 2019

@author: alerojo
"""

from mapper import mauve
from summary import extract_main
from gene_predict import prokka, blast
from kmer import kmer, trf, clustermap
from quastunmap import quast
from coverage import cvg_main

import argparse
import os

""" SASpector

SASpector is a tool that compares a short-read assembly with a reference bacterial genome (for example obtained via hybrid assembly) by extracting missing (unmapped) regions from the reference and analyzing them to see functional and compositional pattern.
The aim of the analysis is to explain why these regions are missed by the short-read assembly and if important parts of the genome are missed when a resolved genome is lacking.

The tool takes as global inputs the reference genome and a short-read assembly as contigs/draft genome, both in FASTA format.

"""


def main():
    """ Main function of SASpector

    """
    parser = argparse.ArgumentParser(prog = 'SASpector - Short-read Assembly inSpector', description = '')
    parser.add_argument('reference', type = str, metavar = 'Reference FASTA file', help = 'Hybrid assembly FASTA file as reference genome')
    parser.add_argument('contigs', type = str, metavar = 'Contigs FASTA file', help = 'Illumina FASTA file as contigs/draft genome')
    parser.add_argument('-p', '--prefix', metavar = 'Prefix', type = str, help = 'Genome ID')
    parser.add_argument('-dir', '--outdir', metavar = 'Output path', help = 'Output directory')
    parser.add_argument('-f', '--flanking', nargs = '?' , metavar = 'Length', const = 'flanking', type = int, help = 'Add flanking regions [Default = 100]', default = 0)
    parser.add_argument('-db', '--proteindb', nargs = '?', metavar = 'Protein FASTA file', const = 'proteindb', type = str, help = 'BLAST protein database FASTA file')
    parser.add_argument('-k', '--kmers', nargs = '?' ,const = 'kmers', metavar = 'k size', type = int, help = 'Calculate kmer frequencies', default = 0)
    parser.add_argument('-q','--quast', help = 'Run QUAST for unmapped regions against reference assembly', action = 'store_true')
    parser.add_argument('-c', '--coverage', nargs='?', const='coverage', metavar='BAM file', type = str, help = 'Run SAMtools bedcov to look at short-read coverage in the missing regions. Needs alignment of reads to the reference genome in BAM format')
        
    args = parser.parse_args()

    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)

    mauve(args.reference, args.contigs, args.prefix, args.outdir)
    mappedlocations, unmappedlocations, conflictlocations, reverselocations = extract_main(args.reference, args.prefix, args.flanking, args.outdir)
    prokka(args.prefix, args.outdir)
    if args.proteindb:
        blast(args.outdir, args.prefix, args.proteindb)
    if args.kmers:
        kmer(args.kmers, args.prefix, args.outdir)
        trf(args.prefix, args.outdir)
        clustermap(args.prefix, args.outdir)
    if args.quast is True:
         quast(args.reference, args.outdir, args.prefix)
    if args.coverage:
        cvg_main(mappedlocations, conflictlocations, args.coverage, args.reference, args.outdir, args.prefix)

    print('Done!')

if __name__ == '__main__':
    main()
