#!/usr/bin/env python
# -*- coding: utf-8

import sys
import argparse

import anvio
import anvio.utils as utils
import anvio.terminal as terminal
import anvio.filesnpaths as filesnpaths
import anvio.hmmopswrapper as hmmopswrapper

from anvio.errors import ConfigError, FilesNPathsError


__author__ = "Developers of anvi'o (see AUTHORS.txt)"
__copyright__ = "Copyleft 2015-2018, the Meren Lab (http://merenlab.org/)"
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"
__description__ = ("A simple script to generate a TAB-delimited file for the presence or absence "
                   "of HMM hits in a given set of contigs databases and an HMM source.")


run = terminal.Run()
progress = terminal.Progress()


def main(args):
    A = lambda x: args.__dict__[x] if x in args.__dict__ else None
    hmm_source = A('hmm_source') or set([])
    output_file_path = A('output_file')

    s = hmmopswrapper.SequencesForHMMHitsWrapperForMultipleContigs(args, set([]))

    if not hmm_source:
        raise ConfigError("You need to declare an HMM source for this to work :/")

    HMM_sources_common_to_all = s.get_HMM_sources_common_to_all_genomes()
    if not len(HMM_sources_common_to_all):
        raise ConfigError("There is not a single HMM source that is common to all "
                          "contigs databases you have :/")

    if hmm_source not in HMM_sources_common_to_all:
        raise ConfigError('The HMM source "%s" is not common to all of your contigs '
                          'databases you want to work with :/ Here is a list of those '
                          'that are common: %s' % (hmm_source, ', '.join(HMM_sources_common_to_all)))

    if not output_file_path:
        raise ConfigError("This will not work without an output file path.")

    s = hmmopswrapper.SequencesForHMMHitsWrapperForMultipleContigs(args, set([hmm_source]), run=terminal.Run(verbose=False))

    filesnpaths.is_output_file_writable(output_file_path)

    gene_names_in_source = [g.strip() for g in s.hmm_hits_info[hmm_source]['genes'].split(',')]

    d = {}
    progress.new('Processing congits databases')
    for genome_name in s.genomes:
        d[genome_name] = {}
        for gene_name in gene_names_in_source:
            d[genome_name][gene_name] = 0

    for hit in s.hmm_hits.values():
        d[s.genome_hash_to_genome_name[hit['genome_hash']]][hit['gene_name']] += 1

    utils.store_dict_as_TAB_delimited_file(d, output_file_path, headers=['genome_or_bin'] + sorted(gene_names_in_source))

    run.info('Output', output_file_path)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description=__description__)

    groupB = parser.add_argument_group('INPUT: INTERNAL/EXTERNAL GENOMES FILE', "Yes. You need to use an internal and/or external genomes file\
                                        to tell anvi'o where your contigs databases are.")
    groupB.add_argument(*anvio.A('external-genomes'), **anvio.K('external-genomes'))
    groupB.add_argument(*anvio.A('internal-genomes'), **anvio.K('internal-genomes'))

    groupD = parser.add_argument_group('HMM STUFF', "This is where you can specify an HMM source, and/or a list of genes to filter\
                                        your results.")
    groupD.add_argument(*anvio.A('hmm-source'), **anvio.K('hmm-source'))
    groupD.add_argument(*anvio.A('list-hmm-sources'), **anvio.K('list-hmm-sources'))

    groupD = parser.add_argument_group('OUTPUTTAH')
    groupD.add_argument(*anvio.A('output-file'), **anvio.K('output-file', {'required': True }))

    args, unknown = parser.parse_known_args()

    try:
        sys.exit(main(args))
    except ConfigError as e:
        print(e)
        sys.exit(-1)
    except FilesNPathsError as e:
        print(e)
        sys.exit(-2)
