#!/usr/bin/env python
# -*- coding: utf-8
"""A script to call CONCOCT clustering on a merged anvi'o profile"""

import sys

import anvio
import anvio.utils as utils
import anvio.concoct as concoct
import anvio.filesnpaths as filesnpaths

from anvio.errors import ConfigError, FilesNPathsError


__author__ = "Developers of anvi'o (see AUTHORS.txt)"
__copyright__ = "Copyleft 2015-2018, the Meren Lab (http://merenlab.org/)"
__credits__ = ["Christopher Quince"]
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"
__resources__ = [("Original CONCOCT paper by Alneberg et al.", "https://www.nature.com/articles/nmeth.3103"),
                 ("An example usage of the program in the context of the Infant Gut Dataset by Sharon et al.", "http://merenlab.org/tutorials/infant-gut/#from-fragmentation-to-conflation-error-a-meren-lab-heuristic-to-fight-back"),
                 ("The use of this program in the context of initial binning of TARA population genomes", "http://merenlab.org/data/2017_Delmont_et_al_HBDs/#initial-automated-binning-with-concoct")]
__tags__ = ["profile_db", "clustering", "collections"]


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(description="A program to cluster items in a merged anvi'o profile using CONCOCT, and optionally\
                                                  creating a collection in the profile database. This is especially useful if you need to\
                                                  have more control over the number of clusters to work with if you are planning to refine\
                                                  them manually later.")

    parser.add_argument(*anvio.A('profile-db'), **anvio.K('profile-db'))
    parser.add_argument(*anvio.A('contigs-db'), **anvio.K('contigs-db'))
    parser.add_argument(*anvio.A('output-file'), **anvio.K('output-file'))
    parser.add_argument(*anvio.A('skip-store-in-db'), **anvio.K('skip-store-in-db'))
    parser.add_argument(*anvio.A('collection-name'), **anvio.K('collection-name', {'default': 'CONCOCT'}))
    parser.add_argument(*anvio.A('num-clusters-requested'), **anvio.K('num-clusters-requested'))

    args = anvio.get_args(parser)

    try:
        collection_name = args.collection_name.strip()
        if not len(collection_name):
            raise ConfigError('Nice try. Collection name cannot be emtpy')
        try:
            utils.check_sample_id(collection_name)
        except:
            raise ConfigError('"%s" is not a proper collection name. A proper one should be a single word and not contain\
                                ANY characters but digits, ASCII letters and underscore character(s). There should not be\
                                any space characters, and the collection name should not start with a digit.' % collection_name)

        # make sure output file is writable before the analysis...
        if args.output_file:
            filesnpaths.is_output_file_writable(args.output_file)

        c = concoct.CONCOCT(args)
        c.cluster()

        if args.output_file:
            c.store_clusters_as_TAB_delimited_text(args.output_file)
        if not args.skip_store_in_db:
            c.store_clusters_in_db(collection_name = collection_name)

    except ConfigError as e:
        print(e)
        sys.exit(-1)
    except FilesNPathsError as e:
        print(e)
        sys.exit(-2)
