#!/usr/bin/env python
# -*- coding: utf-8
"""Export aligned sequences from anvi'o pan genomes"""

import os
import sys
import argparse

import anvio
import anvio.dbops as dbops
import anvio.utils as utils
import anvio.terminal as terminal
import anvio.filesnpaths as filesnpaths
import anvio.summarizer as summarizer

from anvio.errors import ConfigError, FilesNPathsError, DictIOError


__author__ = "Developers of anvi'o (see AUTHORS.txt)"
__copyright__ = "Copyleft 2015-2018, the Meren Lab (http://merenlab.org/)"
__credits__ = ["Ryan Bartelme", "Jay Osvatic"]
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"


run = terminal.Run()
progress = terminal.Progress()


def main(args):
    if args.gene_cluster_id and args.gene_cluster_ids_file:
        raise ConfigError('You should either declare a single gene cluster name, or gene cluster names in a file')

    if (args.gene_cluster_id or args.gene_cluster_ids_file) and args.collection_name:
        raise ConfigError('You can either declare specific list of gene clusters to work with (through `--gene-cluster-id` or `--gene-cluster-ids-file`) or\
                           go the collection way using parameters `--collection-name` and `--bin-name`. Those are not to be\
                           mixed. If you need to know what collections are available in the pan database, use the flag \
                           `--list-collections`.')

    if not args.output_file:
        args.output_file = os.path.join(os.path.dirname(os.path.abspath(args.pan_db)), 'gene-cluster-alignments-output-with-an-ugly-name.fa')

    if not args.concatenate_gene_clusters and args.align_with:
        raise ConfigError("If you are not asking for concatenated gene clusters, then you really don't need to specify an aligner.\
                           Yes, anvi'o could ignore that flag and move on, but who would we become if we get used to ignoring those\
                           little signs of 'I am not sure what I am doing'?")

    filesnpaths.is_output_file_writable(args.output_file)

    gene_cluster_ids = set([])
    if args.collection_name or args.list_collections or args.list_bins:
        progress.new('Initializing')
        progress.update('...')
        pan = summarizer.PanSummarizer(args, r=terminal.Run(verbose=False), p=terminal.Progress(verbose=False))
        progress.end()

        if not args.bin_id:
            raise ConfigError("When you use a collection name, you must also declare a bin id :/ You don't know what bin you want? Use the flag\
                               `--list-bins`.")

        pan.collections.is_bin_in_collection(collection_name=args.collection_name, bin_name=args.bin_id)
        collection_dict = pan.collections.get_collection_dict(args.collection_name)
        gene_cluster_ids = set(collection_dict[args.bin_id])

        run.info('Mode', 'Reporting alignments for a gene clusters from in the collection %s and bin %s.' % (args.collection_name, args.bin_id))
    elif (args.gene_cluster_id or args.gene_cluster_ids_file):
        if args.gene_cluster_id:
            gene_cluster_ids = set([args.gene_cluster_id])
            run.info('Mode', 'Reporting alignments for a single gene cluster.')
        else:
            columns = utils.get_columns_of_TAB_delim_file(args.gene_cluster_ids_file, include_first_column=True)
            if len(columns) != 1:
                raise ConfigError("The input file for gene cluster IDs must contain a single column. It seems yours has %d :/" % len(columns))

            gene_cluster_ids = set([p.strip('\n') for p in open(args.gene_cluster_ids_file, 'rU').readlines()])
            run.info('Mode', 'Reporting alignments for a list of gene clusters from an input file.')
    else:
        run.info('Mode', 'Reporting alignments for all gene clusters.')

    pan = dbops.PanSuperclass(args)
    pan.init_gene_clusters(gene_cluster_ids)

    # this will simply return pan.gene_clusters if there are no filters requested
    filtered_gene_clusters_dict = pan.filter_gene_clusters_dict(args)

    if args.concatenate_gene_clusters:
        pan.write_sequences_in_gene_clusters_for_phylogenomics(gene_clusters_dict=filtered_gene_clusters_dict,
                                                               output_file_path=args.output_file,
                                                               report_DNA_sequences=args.report_DNA_sequences,
                                                               align_with=args.align_with,
                                                               separator=args.separator)
    else:
        pan.write_sequences_in_gene_clusters_to_file(gene_clusters_dict=filtered_gene_clusters_dict,
                                                     output_file_path=args.output_file,
                                                     report_DNA_sequences=args.report_DNA_sequences)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Do cool stuff with gene clusters in anvi'o pan genomes")

    groupA = parser.add_argument_group('INPUT FILES', "Input files from the pangenome analysis.")
    groupA.add_argument(*anvio.A('pan-db'), **anvio.K('pan-db'))
    groupA.add_argument(*anvio.A('genomes-storage'), **anvio.K('genomes-storage', {'required': False}))

    groupB = parser.add_argument_group('OUTPUT', "You get to chose an output file name to report things. The default will be\
                                       an ugly name. So, be explicit.")
    groupB.add_argument(*anvio.A('output-file'), **anvio.K('output-file'))
    groupB.add_argument(*anvio.A('report-DNA-sequences'), **anvio.K('report-DNA-sequences'))

    groupC = parser.add_argument_group('SELECTION', "Which gene clusters should be reported. You can ask for a single gene cluster,\
                                       or multiple ones listed in a file, or you can use a collection and bin name to list gene clusters\
                                       of interest. If you give nothing, this program will export alignments for every single gene cluster\
                                       found in the profile database (and this is called 'customer service').")
    groupC.add_argument(*anvio.A('gene-cluster-id'), **anvio.K('gene-cluster-id'))
    groupC.add_argument(*anvio.A('gene-cluster-ids-file'), **anvio.K('gene-cluster-ids-file'))
    groupC.add_argument(*anvio.A('collection-name'), **anvio.K('collection-name'))
    groupC.add_argument(*anvio.A('bin-id'), **anvio.K('bin-id'))

    groupD = parser.add_argument_group('ADVANCED FILTERS', "If you are here you must be looking for ways to specify\
                                        exactly what you want from that database of gene clusters. These filters will\
                                        be applied to what your previous selections reported.")
    groupD.add_argument(*anvio.A('min-num-genomes-gene-cluster-occurs'), **anvio.K('min-num-genomes-gene-cluster-occurs'))
    groupD.add_argument(*anvio.A('max-num-genomes-gene-cluster-occurs'), **anvio.K('max-num-genomes-gene-cluster-occurs'))
    groupD.add_argument(*anvio.A('min-num-genes-from-each-genome'), **anvio.K('min-num-genes-from-each-genome'))
    groupD.add_argument(*anvio.A('max-num-genes-from-each-genome'), **anvio.K('max-num-genes-from-each-genome'))
    groupD.add_argument(*anvio.A('max-num-gene-clusters-missing-from-genome'), **anvio.K('max-num-gene-clusters-missing-from-genome'))
    groupD.add_argument(*anvio.A('add-into-items-additional-data-table'), **anvio.K('add-into-items-additional-data-table'))

    groupE = parser.add_argument_group('OTHER STUFF', "Yes. Stuff that are not like the ones above.")
    groupE.add_argument(*anvio.A('list-collections'), **anvio.K('list-collections'))
    groupE.add_argument(*anvio.A('list-bins'), **anvio.K('list-bins'))

    groupF = parser.add_argument_group('CONCATENATED OUTPUT', "Concatenated output for phylogenomics.")
    groupF.add_argument(*anvio.A('concatenate-gene-clusters'), **anvio.K('concatenate-gene-clusters'))
    groupF.add_argument(*anvio.A('separator'), **anvio.K('separator'))
    groupF.add_argument(*anvio.A('align-with'), **anvio.K('align-with'))
    groupF.add_argument(*anvio.A('list-aligners'), **anvio.K('list-aligners'))

    groupG = parser.add_argument_group('TOTALLY IRRELEVANT', "Just in case you need it.")
    groupG.add_argument(*anvio.A('just-do-it'), **anvio.K('just-do-it'))


    args = anvio.get_args(parser)

    try:
        main(args)
    except ConfigError as e:
        print(e)
        sys.exit(-1)
    except FilesNPathsError as e:
        print(e)
        sys.exit(-2)
    except DictIOError as e:
        print(e)
        sys.exit(-3)
