#!/usr/bin/env python
# -*- coding: utf-8

import sys
import copy
import operator

import anvio
import anvio.dbops as dbops
import anvio.utils as utils
import anvio.terminal as terminal
import anvio.filesnpaths as filesnpaths
import anvio.ccollections as ccollections

from anvio.completeness import Completeness
from anvio.errors import ConfigError, FilesNPathsError


__author__ = "A. Murat Eren"
__copyright__ = "Copyright 2016, The anvio Project"
__credits__ = []
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"


run = terminal.Run()
progress = terminal.Progress()
pp = terminal.pretty_print


def main(args):
    A = lambda x: args.__dict__[x] if x in args.__dict__ else None
    profile_db_path = A('profile_db')
    contigs_db_path = A('contigs_db')
    list_collections = A('list_collections')
    collection_name_to_read = A('collection_to_read')
    collection_name_to_write = A('collection_to_write')
    prefix = A('prefix')
    report_file_path = A('report_file')
    call_MAGs = A('call_MAGs')
    min_completion_for_MAG = A('min_completion_for_MAG')
    max_redundancy_for_MAG = A('max_redundancy_for_MAG')
    size_for_MAG = A('size_for_MAG')
    use_SCG_averages = A('use_SCG_averages')
    use_highest_completion_score = A('use_highest_completion_score')
    dry_run = A('dry_run')

    if use_SCG_averages and use_highest_completion_score:
        raise ConfigError("You can't use both `--use-SCG-averages` and `--use-highest-completion-score`")

    if not use_SCG_averages and not use_highest_completion_score:
        raise ConfigError("You must choose one: `--use-SCG-averages`, or`--use-highest-completion-score`")

    dbops.is_profile_db_and_contigs_db_compatible(profile_db_path, contigs_db_path)

    collections = ccollections.Collections()
    collections.populate_collections_dict(profile_db_path)

    if list_collections:
        collections.list_collections()
        sys.exit()

    if not prefix:
        raise ConfigError("Anvi'o is having hard time believing that you called this function without\
                            a prefix to rename bins in the collection '%s'." % collection_name_to_read)

    utils.is_this_name_OK_for_database('prefix for bins', prefix)

    if not report_file_path:
        raise ConfigError("You must provide an output file name to report file changes. It may or may not\
                            be useful to you, but let's don't take unnecessary risks, eh? (you can use the\
                            `--report-file` parameter)")

    filesnpaths.is_output_file_writable(report_file_path)

    if collection_name_to_read == collection_name_to_write:
        raise ConfigError("Well :( Collection name to read from can't be identical to the collection name\
                            you want to use to store updated bin names. You know. It kinda defeats the purpose.")

    if not collection_name_to_read:
        raise ConfigError("You must provide a collection name to read from.")

    if not collection_name_to_write:
        raise ConfigError("You must provide a new collection name to write updated bin names.")

    utils.is_this_name_OK_for_database('collection name two write', collection_name_to_write)

    if  collection_name_to_read not in collections.collections_dict:
        raise ConfigError("%s is not a valid collection name, because it doesn't exist :(. See a\
                            list of available ones with '--list-collections' flag" % collection_name_to_read)

    if  collection_name_to_write in collections.collections_dict:
        raise ConfigError("The new collection name %s is already in the database. You must choose a new\
                            collection name." % collection_name_to_write)

    completeness = Completeness(contigs_db_path)

    if not len(completeness.sources):
        raise ConfigError("HMM's were not run for this contigs database :/ Without that, how can this poor program can rename bins based on\
                            their completion estimates? :(")

    if use_SCG_averages:
        run.warning('As per your request, this run will use average completion and redundancy estimates\
                     recovered from all single-copy core gene collections.')

    elif use_highest_completion_score:
        run.warning('As per your request, this run will use the completion and redundancy estimate\
                     recovered from the single-copy core gene collection that provides the highest\
                     completion estimate.')
    else:
        if not 'Campbell_et_al' in completeness.sources:
            raise ConfigError("In most cases anvi'o relies on single-copy gene collection by Campbell\
                                et al., and it seems the contigs database does not contain HMM hits for\
                                that resource :/ Bad news.")


    contigs_db = dbops.ContigsSuperclass(args, r = run, p = progress)

    collection_dict = collections.get_collection_dict(collection_name_to_read)
    bins_info_dict = collections.get_bins_info_dict(collection_name_to_read)

    counter = 0
    MAGs_sorted_by_completion = []
    bins_sorted_by_completion = []
    total_num_bins = len(collection_dict)
    progress.new('Going through bins in the collection %s' % collection_name_to_read)
    for bin_name in collection_dict:
        counter += 1
        progress.update('%d in %d' % (counter, total_num_bins))
        p_completion, p_redundancy, domain, domain_confidence, d = completeness.get_info_for_splits(set(collection_dict[bin_name]))
        if use_SCG_averages:
            pass
        elif use_highest_completion_score:
            l = []
            for domain in d:
                for scg_collection_name in d[domain]:
                    l.append((d[domain][scg_collection_name]['percent_complete'], domain, scg_collection_name),)
            _, domain, scg_collection_name = sorted(l, reverse=True)[0]

            p_completion = d[domain][scg_collection_name]['percent_complete']
            p_redundancy = d[domain][scg_collection_name]['percent_redundancy']
        else:
            raise ConfigError("WTF :( Something that shuold never happened just happened,")

        size_in_Mbp = sum([contigs_db.splits_basic_info[split_name]['length'] for split_name in set(collection_dict[bin_name])]) / 1000000.0
        substantive_completion = p_completion - p_redundancy

        if call_MAGs:
            if p_redundancy < max_redundancy_for_MAG and (p_completion >= min_completion_for_MAG or size_in_Mbp >= size_for_MAG):
                MAGs_sorted_by_completion.append((bin_name, substantive_completion, 'MAG'),)
            else:
                bins_sorted_by_completion.append((bin_name, substantive_completion, 'Bin'),)
        else:
            bins_sorted_by_completion.append((bin_name, substantive_completion, 'Bin'),)

    MAGs_sorted_by_completion.sort(key=operator.itemgetter(1), reverse=True)
    bins_sorted_by_completion.sort(key=operator.itemgetter(1), reverse=True)
    progress.end()

    new_collection_dict = {}
    new_bins_info_dict = {}

    counter = 0
    report = open(report_file_path, 'w')
    report.write('old_bin_name\tnew_bin_name\n')
    for bin_name, _, bin_type in MAGs_sorted_by_completion + bins_sorted_by_completion:
        counter += 1
        new_bin_name = '%s_%s_%05d' % (prefix, bin_type, counter)
        new_collection_dict[new_bin_name] = copy.deepcopy(collection_dict[bin_name])

        if bin_name in bins_info_dict:
            new_bins_info_dict[new_bin_name] = copy.deepcopy(bins_info_dict[bin_name])

        report.write('%s\t%s\n' % (bin_name, new_bin_name))


    report.close()
    run.info('Report', '%s' % (report_file_path))

    if dry_run:
        run.warning('This was a dry run. So nothing is updated in the profile database, but now there is\
                     a fancy report file that shows how your bins would have been renamed. Please take a\
                     look at it and make sure things seem just like the way you want. Once you are satisfied\
                     you can run the program without the --dry-run flag.')

        return

    collections_table = dbops.TablesForCollections(profile_db_path)
    collections_table.append(collection_name_to_write, new_collection_dict, new_bins_info_dict)

    run.info('Rename', 'All %d bins in the collection "%s" is renamed, and stored under the collection\
                        name "%s".' % (counter, collection_name_to_read, collection_name_to_write))


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(description='Rename all bins in a given collection (so they have pretty names).')

    groupA = parser.add_argument_group('DEFAULT INPUTS', "Standard stuff")
    groupB = parser.add_argument_group('OUTPUT AND TESTING', "a.k.a, sweet parameters of convenience")
    groupC = parser.add_argument_group('MAG OPTIONS', "If you want to call some bins 'MAGs' because you are so cool")
    groupD = parser.add_argument_group('SCG OPTIONS', "Options related to single-copy genes. How should SCGs utilized to\
                                                       say what should be considered a MAG?")
    groupA.add_argument(*anvio.A('contigs-db'), **anvio.K('contigs-db'))
    groupA.add_argument(*anvio.A('profile-db'), **anvio.K('profile-db'))
    groupA.add_argument('--collection-to-read', default = None,
                  help = "Collection name to read from. Anvi'o will not overwrite an existing collection, instead,\
                          it will create a copy of your collection with new bin names.")
    groupA.add_argument('--collection-to-write', default = None,
                  help = "The new collection name. Give it a nice, fancy name.")

    groupB.add_argument('--prefix', default = None,
                  help = "Prefix for the bin names. Must be a single word, composed\
                          of digits and numbers. The use of the underscore character is OK,\
                          but that's about it (fine, the use of the dash character is OK, too\
                          but no more!). If the prefix is 'PREFIX', each bin will be renamed\
                          as 'PREFIX_XXX_00001, PREFIX_XXX_00002', and so on, in the order of\
                          percent completion minus percent redundancy (what we call, 'substantive\
                          completion'). The 'XXX' part will either be 'Bin', or 'MAG depending on\
                          other parameters you use. Keep reading.")
    groupB.add_argument('--report-file', metavar = 'REPORT_FILE_PATH', default = None, \
                        help = "This file will report each name change event, so you can trace back\
                                the original names of renamed bins later.")
    groupB.add_argument(*anvio.A('list-collections'), **anvio.K('list-collections'))
    groupB.add_argument(*anvio.A('dry-run'), **anvio.K('dry-run', {'help': 'When used does NOT update the\
                                profile database, just creates the report file so you can view how things\
                                will be renamed.'}))

    groupC.add_argument('--call-MAGs', default=False, action="store_true", \
                        help = "This program by default rename your bins as 'PREFIX_Bin_00001', 'PREFIX_Bin_00002'\
                                and so on. If you use this flag, it will name the ones that meet the criteria\
                                described by MAG-related flags as 'PREFIX_MAG_00001', 'PREFIX_MAG_00002', and\
                                so on. The ones that do not get to be named as MAGs will remain as bins.")
    groupC.add_argument('--min-completion-for-MAG', default=70, type=float, metavar='[0-100]', choices=list(range(0, 101)), \
                        help="If --call-MAGs flag is used, call any bin a 'MAG' if their completion estimate is\
                              above this (the default is %(default)d), and the redundancy estimate is\
                              less than --max-redundancy-for-MAG.")
    groupC.add_argument('--max-redundancy-for-MAG', default=10, type=float, metavar='[0-100]', choices=list(range(0, 101)), \
                        help="If --call-MAGs flag is used, call any bin a 'MAG' if their redundancy estimate is\
                              below this (the default is %(default)d) and the completion estimate is above\
                              --min-completion-for-MAG.")
    groupC.add_argument('--size-for-MAG', default=2, type=float, metavar='0.1-10 Mbp', choices=[x / 10.0 for x in range(1, 101)], \
                        help="If --call-MAGs flag is used, call any bin a 'MAG' if their redundancy estimate\
                              is less than --max-redundancy-for-MAG, and the size is larger than this (the\
                              default is %(default)d Mbp), regarldless of the completion.")

    groupD.add_argument('--use-SCG-averages', default=False, action="store_true", \
                        help = "If you use this flag, anvi'o will use all avialble single-copy core gene collections, will\
                                average their independent completion and redundancy estimates, will select the best matching\
                                domain (i.e. arcaheal SCGs vs bacterial SCG), and use the resulting estimates numbers for\
                                renaming purposes.")
    groupD.add_argument('--use-highest-completion-score', default=False, action="store_true", \
                        help = "If you use this flag, instead of `--use-SCG-averages`, anvi'o will take the SCG collection\
                                that estimates the highest completion for a given bin. This will affect how MAGs are called.\
                                For instance, if you have HMM hits in your contigs database for both bacterial and archaeal\
                                single-copy gene collections, the completion score for a given archaeal bin with a bacterial\
                                single-copy genes will be very low. This flag will select the collection with the highest\
                                completion estimate, and use that instead of taking the average.")
    args = parser.parse_args()

    try:
        main(args)
    except ConfigError as e:
        print(e)
        sys.exit(-1)
    except FilesNPathsError as e:
        print(e)
        sys.exit(-1)
