#!/usr/bin/env python
# -*- coding: utf-8
"""Generator of genome storages"""

import sys

import anvio
import anvio.panops as panops
import anvio.terminal as terminal

from anvio.errors import ConfigError, FilesNPathsError


__author__ = "A. Murat Eren"
__copyright__ = "Copyright 2016, The anvio Project"
__credits__ = []
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"


run = terminal.Run()
progress = terminal.Progress()


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(description="Create a genome storage from internal or external genomes for a pan genome analysis.")

    groupC = parser.add_argument_group("OUTPUT", "Give it a nice name. Must end with '-GENOMES.h5'. This is primarily due to the fact\
                                                  that there are other .h5 files used throughout anvi'o and it would be better to\
                                                  distinguish this very special file from them.")
    groupC.add_argument(*anvio.A('output-file'), **anvio.K('output-file', {'required': True}))

    groupA = parser.add_argument_group('EXTERNAL GENOMES', "External genomes listed as anvi'o contigs databases. As in, you have one\
                                                    or more genomes say from NCBI you want to work with, and you created an\
                                                    anvi'o contigs database for each one of them.")
    groupA.add_argument('-e', '--external-genomes', metavar = 'FILE', default = None,
                        help = "A two-column TAB-delimited flat text file that lists anvi'o contigs databases. The first item\
                                in the header line should read 'name', and the second should read 'contigs_db_path'. Each line in the\
                                file should describe a single entry, where the first column is the name of the genome (or MAG), and\
                                the second column is the anvi'o contigs database generated for this genome.")

    groupB = parser.add_argument_group("INTERNAL GENOMES", "Genome bins stored in an anvi'o profile databases as collections.")
    groupB.add_argument('-i', '--internal-genomes', metavar = 'FILE', default = None,
                        help = "A four-column TAB-delimited flat text file. The header line must contain thse columns: 'name', 'bin_id',\
                                'collection_id', 'profile_db_path', 'contigs_db_path'. Each line should list a single entry, where 'name'\
                                can be any name to describe the anvi'o bin identified as 'bin_id' that is stored in a collection.")

    args = parser.parse_args()

    try:
        pan = panops.GenomeStorage(args, run, progress)
        pan.create_genomes_data_storage()
    except ConfigError as e:
        print(e)
        sys.exit(-1)
    except FilesNPathsError as e:
        print(e)
        sys.exit(-2)
