#!/usr/bin/env python
# -*- coding: utf-8

import os
import sys

import anvio
import anvio.tables as t
import anvio.utils as utils
import anvio.terminal as terminal
import anvio.constants as constants
import anvio.clustering as clustering
import anvio.interactive as interactive
import anvio.filesnpaths as filesnpaths

from anvio.errors import ConfigError, FilesNPathsError


__author__ = "Developers of anvi'o (see AUTHORS.txt)"
__copyright__ = "Copyleft 2015-2018, the Meren Lab (http://merenlab.org/)"
__credits__ = ["Tom O. Delmont"]
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"


run = terminal.Run()
progress = terminal.Progress()
pp = terminal.pretty_print


class AdHocRunGenerator:
    """From a matrix file to full-blown anvi'o interface.

       This is a class to take in a view data matrix at minimum, and create all
       necessary files for an anvi'o interactive interface call in manual mode."""

    def __init__(self, output_directory, view_data, additional_view_data, samples, skip_clustering_view_data=False, run=run, progress=progress, distance=None, linkage=None):
        self.run = run
        self.progress = progress

        self.view_data = view_data
        self.additional_view_data = additional_view_data
        self.samples = samples

        self.skip_clustering_view_data = skip_clustering_view_data
        self.delete_output_directory_if_exists = False

        # for clustering
        self.distance = distance or constants.distance_metric_default
        self.linkage = linkage or constants.distance_metric_default

        self.output_directory = output_directory


    def sanity_check(self):
        clustering.is_distance_and_linkage_compatible(self.distance, self.linkage)

        if os.path.exists(self.output_directory) and not self.delete_output_directory_if_exists:
            raise ConfigError("AdHocRunGenerator will not work with an existing directory. Please provide a new\
                                path, or use the bool member 'delete_output_directory_if_exists' to overwrite\
                                any existing directory.")

        filesnpaths.gen_output_directory(self.output_directory, delete_if_exists=self.delete_output_directory_if_exists)


    def get_output_file_path(self, file_name):
        return os.path.join(self.output_directory, file_name)


    def generate(self):
        self.sanity_check()

        # write view data
        view_data_path = self.get_output_file_path('view.txt')
        self.run.info("View data file", view_data_path)
        utils.store_dict_as_TAB_delimited_file(self.view_data, view_data_path, headers = ['contig'] + self.samples)

        # generate newick and write to file
        if not self.skip_clustering_view_data:
            tree_path = self.get_output_file_path('tree.txt')
            newick = clustering.get_newick_tree_data_for_dict(self.view_data, distance = self.distance, linkage=self.linkage)
            self.run.info("Tree file", tree_path)

            with open(tree_path, 'w') as f:
                f.write(newick)

        # create new profile.db and populate additional data
        profile_db_path = self.get_output_file_path('profile.db')
        self.run.info('Profile database', profile_db_path)

        args = lambda: None
        args.profile_db = profile_db_path
        args.manual_mode = True
        args.dry_run = True
        args.view_data = view_data_path
        args.tree = tree_path
        interactive.Interactive(args)

        self.populate_additional_data(profile_db_path)

        self.run.info_single("Good news, your data is ready.", nl_before=1, mc='green')
        self.run.info_single("Please run 'anvi-interactive --manual -p %s --tree %s --view-data %s'" % (profile_db_path, tree_path, view_data_path), cut_after=200, nl_after=1, mc='green')


    def populate_additional_data(self, profile_db_path):
        args = lambda: None
        args.profile_db = profile_db_path

        table = t.miscdata.TableForItemAdditionalData(args)
        table.add(self.additional_view_data, ['Competing NTs', 'Position in codon', 'Gene callers ID'], skip_check_names=True)

        table = t.miscdata.TableForLayerOrders(args)
        layer_newick = clustering.get_newick_tree_data_for_dict(self.view_data, transpose=True, distance = self.distance, linkage=self.linkage)
        table.add({'default': {'data_type': 'newick', 'data_value': layer_newick}})



def main(args):
    filesnpaths.check_output_directory(args.output_dir)
    profile = utils.get_TAB_delimited_file_as_dictionary(args.profile)
    run.info('Num entries', pp(len(profile)))

    for field in [f for f in t.variable_nts_table_structure[1:] if f != 'split_name']:
        if field not in list(profile.values())[0]:
            raise ConfigError("The input file does not look like it is generated by anvi-gen-variability-profile.\
                                It is missing at least one field that should have appeared in this file (%s)" % field)

    nt_positions = set(e['unique_pos_identifier'] for e in list(profile.values()))
    run.info('Num positions', pp(len(nt_positions)))

    samples = sorted(list(set([e['sample_id'] for e in list(profile.values())])))
    run.info('Num samples', pp(len(samples)))

    data_dict = {}
    additional_data_dict = {}
    for entry in list(profile.values()):
        unique_pos_identifier = 'p_%s' % (str(entry['unique_pos_identifier']))
        sample = entry['sample_id']
        departure_from_consensus = float(entry['departure_from_consensus'])

        if unique_pos_identifier not in data_dict:
            data_dict[unique_pos_identifier] = dict(list(zip(samples, [0.0] * len(samples))))
            additional_data_dict[unique_pos_identifier] = {'Competing NTs': None, 'Position in codon': None}

        if departure_from_consensus > 0.01:
            additional_data_dict[unique_pos_identifier]['Competing NTs'] = entry['competing_nts']

        if departure_from_consensus > args.min_departure_from_consensus and departure_from_consensus < args.max_departure_from_consensus:
            data_dict[unique_pos_identifier][sample] = departure_from_consensus

        # append 'st', 'nd', or 'th' to make categorical
        additional_data_dict[unique_pos_identifier]['Position in codon'] = utils.get_ordinal_from_integer(int(entry['base_pos_in_codon']))
        additional_data_dict[unique_pos_identifier]['Gene callers ID'] = int(entry['corresponding_gene_call'])

    g = AdHocRunGenerator(args.output_dir, data_dict, additional_data_dict, samples, linkage='ward')
    g.generate()


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(description='Take the output of anvi-gen-variability-profile, prepare an output for interactive interface')

    parser.add_argument('profile', help = 'The output file generated by anvi-gen-variability-profile')
    parser.add_argument('--min-departure-from-consensus', type = float, default = 0.00, metavar = "FLOAT",
                        help = "Minimum departure from consensus at a given variable nucleotide position. The default\
                        is %(default).2f.")
    parser.add_argument('--max-departure-from-consensus', type = float, default = 0.99, metavar = "FLOAT",
                        help = "Maximum departure from consensus at a given variable nucleotide position. The default\
                        is %(default).2f.")
    parser.add_argument(*anvio.A('output-dir'), **anvio.K('output-dir', {'required': True}))

    args = anvio.get_args(parser)

    try:
        main(args)
    except ConfigError as e:
        print(e)
        sys.exit(-1)
    except FilesNPathsError as e:
        print(e)
        sys.exit(-1)
