#!/usr/bin/env python
# -*- coding: utf-8

import sys
import copy
import operator

import anvio
import anvio.dbops as dbops
import anvio.utils as utils
import anvio.terminal as terminal
import anvio.filesnpaths as filesnpaths
import anvio.ccollections as ccollections

from anvio.completeness import Completeness
from anvio.errors import ConfigError, FilesNPathsError
from anvio.tables.collections import TablesForCollections


__author__ = "Developers of anvi'o (see AUTHORS.txt)"
__copyright__ = "Copyleft 2015-2018, the Meren Lab (http://merenlab.org/)"
__credits__ = []
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"


run = terminal.Run()
progress = terminal.Progress()
pp = terminal.pretty_print


def main(args):
    A = lambda x: args.__dict__[x] if x in args.__dict__ else None
    profile_db_path = A('profile_db')
    contigs_db_path = A('contigs_db')
    list_collections = A('list_collections')
    collection_name_to_read = A('collection_to_read')
    collection_name_to_write = A('collection_to_write')
    prefix = A('prefix')
    report_file_path = A('report_file')
    call_MAGs = A('call_MAGs')
    min_completion_for_MAG = A('min_completion_for_MAG')
    max_redundancy_for_MAG = A('max_redundancy_for_MAG')
    size_for_MAG = A('size_for_MAG')
    dry_run = A('dry_run')

    utils.is_profile_db_and_contigs_db_compatible(profile_db_path, contigs_db_path)

    collections = ccollections.Collections()
    collections.populate_collections_dict(profile_db_path)

    if list_collections:
        collections.list_collections()
        sys.exit()

    if not prefix:
        raise ConfigError("Anvi'o is having hard time believing that you called this function without\
                            a prefix to rename bins in the collection '%s'." % collection_name_to_read)

    utils.is_this_name_OK_for_database('prefix for bins', prefix)

    if not report_file_path:
        raise ConfigError("You must provide an output file name to report file changes. It may or may not\
                            be useful to you, but let's don't take unnecessary risks, eh? (you can use the\
                            `--report-file` parameter)")

    filesnpaths.is_output_file_writable(report_file_path)

    if collection_name_to_read == collection_name_to_write:
        raise ConfigError("Well :( Collection name to read from can't be identical to the collection name\
                            you want to use to store updated bin names. You know. It kinda defeats the purpose.")

    if not collection_name_to_read:
        raise ConfigError("You must provide a collection name to read from.")

    if not collection_name_to_write:
        raise ConfigError("You must provide a new collection name to write updated bin names.")

    utils.is_this_name_OK_for_database('collection name two write', collection_name_to_write)

    if  collection_name_to_read not in collections.collections_dict:
        raise ConfigError("%s is not a valid collection name, because it doesn't exist :(. See a\
                            list of available ones with '--list-collections' flag" % collection_name_to_read)

    if  collection_name_to_write in collections.collections_dict:
        raise ConfigError("The new collection name %s is already in the database. You must choose a new\
                            collection name." % collection_name_to_write)

    completeness = Completeness(contigs_db_path)

    if not len(completeness.sources):
        raise ConfigError("HMM's were not run for this contigs database :/ Without that, how can this poor program can rename bins based on\
                            their completion estimates? :(")

    run.warning('This run will use the completion and redundancy estimate\
                     recovered from the single-copy core gene collection that provides the highest\
                     completion estimate.')

    if not len(completeness.sources):
        raise ConfigError("It seems the contigs database does not contain HMM hits for\
                            any of the single copy core gene collections :/ Bad news.")


    contigs_db = dbops.ContigsSuperclass(args, r = run, p = progress)

    collection_dict = collections.get_collection_dict(collection_name_to_read)
    bins_info_dict = collections.get_bins_info_dict(collection_name_to_read)

    if call_MAGs:
        warning_msg = "Please read carefully. This program is set to call any bin a 'MAG', if the bin is estimated to be\
                       less than %.0f%% redundant and more than %.0f%% complete. " % (max_redundancy_for_MAG, min_completion_for_MAG)

        if size_for_MAG:
            warning_msg += "But since you set a value for MAG size, any bin that meets the redundancy criterion and larger\
                            than %.2f Mbp in size, WILL ALSO be called a MAG regardless of its completion estimate. This is\
                            a way to not miss refined bins with low completion estimates but large sizes. If this is not what\
                            you want, set `--size-for-MAG` to 0." % (size_for_MAG)
        else:
            warning_msg += "This process will not pay attention to the bin size (Mbp). If you think it should, please see the\
                            flag `--size-for-MAG` in the help menu."

        run.warning(warning_msg, "A FRIENDLY REMINDER", lc='green')

    counter = 0
    MAGs_sorted_by_completion = []
    bins_sorted_by_completion = []
    total_num_bins = len(collection_dict)
    progress.new('Going through bins in the collection %s' % collection_name_to_read)
    for bin_name in collection_dict:
        counter += 1
        progress.update('%d in %d' % (counter, total_num_bins))
        p_completion, p_redundancy, domain, domain_confidence, d = completeness.get_info_for_splits(set(collection_dict[bin_name]))

        l = []
        for domain in d:
            for scg_collection_name in d[domain]:
                l.append((d[domain][scg_collection_name]['percent_completion'], domain, scg_collection_name),)
        _, domain, scg_collection_name = sorted(l, reverse=True)[0]

        p_completion = d[domain][scg_collection_name]['percent_completion']
        p_redundancy = d[domain][scg_collection_name]['percent_redundancy']

        size_in_Mbp = sum([contigs_db.splits_basic_info[split_name]['length'] for split_name in set(collection_dict[bin_name])]) / 1000000.0
        substantive_completion = p_completion - p_redundancy

        if call_MAGs:
            if p_redundancy < max_redundancy_for_MAG:
                if p_completion >= min_completion_for_MAG:
                    MAGs_sorted_by_completion.append((bin_name, substantive_completion, p_completion, p_redundancy, size_in_Mbp, 'MAG'),)
                elif size_for_MAG and size_in_Mbp >= size_for_MAG:
                    MAGs_sorted_by_completion.append((bin_name, substantive_completion, p_completion, p_redundancy, size_in_Mbp, 'MAG'),)
                else:
                    bins_sorted_by_completion.append((bin_name, substantive_completion, p_completion, p_redundancy, size_in_Mbp, 'Bin'),)
            else:
                bins_sorted_by_completion.append((bin_name, substantive_completion, p_completion, p_redundancy, size_in_Mbp, 'Bin'),)
        else:
            bins_sorted_by_completion.append((bin_name, substantive_completion, p_completion, p_redundancy, size_in_Mbp, 'Bin'),)
    MAGs_sorted_by_completion.sort(key=operator.itemgetter(1), reverse=True)
    bins_sorted_by_completion.sort(key=operator.itemgetter(1), reverse=True)
    progress.end()

    new_collection_dict = {}
    new_bins_info_dict = {}

    counter = 0
    report = open(report_file_path, 'w')
    report.write('old_bin_name\tnew_bin_name\tcompletion\tredundancy\tsize_in_Mbp\n')
    for bin_name, substantive_completion, completion, redundancy, size_in_Mbp, bin_type in MAGs_sorted_by_completion + bins_sorted_by_completion:
        counter += 1
        new_bin_name = '%s_%s_%05d' % (prefix, bin_type, counter)
        new_collection_dict[new_bin_name] = copy.deepcopy(collection_dict[bin_name])

        if bin_name in bins_info_dict:
            new_bins_info_dict[new_bin_name] = copy.deepcopy(bins_info_dict[bin_name])

        report.write('%s\t%s\t%.2f\t%.2f\t%.2f\n' % (bin_name, new_bin_name, completion, redundancy, size_in_Mbp))

    report.close()
    run.info('Report', '%s' % (report_file_path))

    if dry_run:
        run.warning('This was a dry run. So nothing is updated in the profile database, but now there is\
                     a fancy report file that shows how your bins would have been renamed. Please take a\
                     look at it and make sure things seem just like the way you want. Once you are satisfied\
                     you can run the program without the --dry-run flag.')

        return

    collections_table = TablesForCollections(profile_db_path)
    collections_table.append(collection_name_to_write, new_collection_dict, new_bins_info_dict)

    run.info('Rename', 'All %d bins in the collection "%s" is renamed, and stored under the collection\
                        name "%s".' % (counter, collection_name_to_read, collection_name_to_write))


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(description='Rename all bins in a given collection (so they have pretty names).')

    groupA = parser.add_argument_group('DEFAULT INPUTS', "Standard stuff")
    groupB = parser.add_argument_group('OUTPUT AND TESTING', "a.k.a, sweet parameters of convenience")
    groupC = parser.add_argument_group('MAG OPTIONS', "If you want to call some bins 'MAGs' because you are so cool")
    groupA.add_argument(*anvio.A('contigs-db'), **anvio.K('contigs-db'))
    groupA.add_argument(*anvio.A('profile-db'), **anvio.K('profile-db'))
    groupA.add_argument('--collection-to-read', default = None,
                  help = "Collection name to read from. Anvi'o will not overwrite an existing collection, instead,\
                          it will create a copy of your collection with new bin names.")
    groupA.add_argument('--collection-to-write', default = None,
                  help = "The new collection name. Give it a nice, fancy name.")

    groupB.add_argument('--prefix', default = None,
                  help = "Prefix for the bin names. Must be a single word, composed\
                          of digits and numbers. The use of the underscore character is OK,\
                          but that's about it (fine, the use of the dash character is OK, too\
                          but no more!). If the prefix is 'PREFIX', each bin will be renamed\
                          as 'PREFIX_XXX_00001, PREFIX_XXX_00002', and so on, in the order of\
                          percent completion minus percent redundancy (what we call, 'substantive\
                          completion'). The 'XXX' part will either be 'Bin', or 'MAG depending on\
                          other parameters you use. Keep reading.")
    groupB.add_argument('--report-file', metavar = 'REPORT_FILE_PATH', default = None, \
                        help = "This file will report each name change event, so you can trace back\
                                the original names of renamed bins later.")
    groupB.add_argument(*anvio.A('list-collections'), **anvio.K('list-collections'))
    groupB.add_argument(*anvio.A('dry-run'), **anvio.K('dry-run', {'help': 'When used does NOT update the\
                                profile database, just creates the report file so you can view how things\
                                will be renamed.'}))

    groupC.add_argument('--call-MAGs', default=False, action="store_true", \
                        help = "This program by default rename your bins as 'PREFIX_Bin_00001', 'PREFIX_Bin_00002'\
                                and so on. If you use this flag, it will name the ones that meet the criteria\
                                described by MAG-related flags as 'PREFIX_MAG_00001', 'PREFIX_MAG_00002', and\
                                so on. The ones that do not get to be named as MAGs will remain as bins.")
    groupC.add_argument('--min-completion-for-MAG', default=70, type=float, metavar='[0-100]', choices=list(range(0, 101)), \
                        help="If --call-MAGs flag is used, call any bin a 'MAG' if their completion estimate is\
                              above this (the default is %(default)d), and the redundancy estimate is\
                              less than --max-redundancy-for-MAG.")
    groupC.add_argument('--max-redundancy-for-MAG', default=10, type=float, metavar='[0-100]', choices=list(range(0, 101)), \
                        help="If --call-MAGs flag is used, call any bin a 'MAG' if their redundancy estimate is\
                              below this (the default is %(default)d) and the completion estimate is above\
                              --min-completion-for-MAG.")
    groupC.add_argument('--size-for-MAG', default=0.0, type=float, metavar='MEGABASEPAIRS',\
                        help="If --call-MAGs flag is used, call any bin a 'MAG' if their redundancy estimate\
                              is less than --max-redundancy-for-MAG, AND THEIR SIZE IS LARGER THAN THIS VALUE\
                              REGARDLESS OF THE COMPLETION ESTIMATE. The default behavior is to not care about\
                              this at all.")
    args = anvio.get_args(parser)

    try:
        main(args)
    except ConfigError as e:
        print(e)
        sys.exit(-1)
    except FilesNPathsError as e:
        print(e)
        sys.exit(-1)
