#!/usr/bin/env python
# -*- coding: utf-8
"""A script to export a FASTA file and the coverage table from a merged database.

The purpose of this script is to export two critical files for most genome binning
software: the sequences file for tetra-nucleotide analysis, and the coverage table
that shows the coverage of each contig across samples. These output files can be
used to identify bins, and those bins can be used to populate collections table
via available parsers in anvi-populate-collections table."""

import os
import sys
import argparse

import anvio
import anvio.dbops as dbops
import anvio.utils as utils
import anvio.terminal as terminal
import anvio.filesnpaths as filesnpaths

from anvio.errors import ConfigError


__author__ = "A. Murat Eren"
__copyright__ = "Copyright 2015, The anvio Project"
__credits__ = []
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"


run = terminal.Run()

def main(args):
    dbops.is_profile_db_and_contigs_db_compatible(args.profile_db, args.contigs_db)

    merged_profile_db = dbops.ProfileDatabase(args.profile_db)

    if(merged_profile_db.meta['merged'] != True):
        raise ConfigError("'%s' does not seem to be a merged profile database :/" % args.profile_db)

    if args.output_dir:
        filesnpaths.gen_output_directory(args.output_dir)
    else:
        args.output_dir = os.path.dirname(os.path.abspath(args.profile_db))

    if not args.output_file_prefix:
        args.output_file_prefix = merged_profile_db.meta['sample_id']

    coverages = merged_profile_db.db.get_table_as_dict('mean_coverage_contigs')
    merged_profile_db.disconnect()

    coverages_file = os.path.join(args.output_dir, args.output_file_prefix + '-COVs.txt')
    splits_fasta = os.path.join(args.output_dir, args.output_file_prefix + '-SPLITS.fa')

    utils.store_dict_as_TAB_delimited_file(coverages, coverages_file, ['contig'] + sorted(list(merged_profile_db.samples)))
    utils.export_sequences_from_contigs_db(args.contigs_db, splits_fasta, seq_names_to_export=sorted(coverages.keys()), splits_mode=True)

    run.info('Coverages file', coverages_file)
    run.info('Sequences file', splits_fasta)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Export splits and the coverage table from database')

    parser.add_argument(*anvio.A('profile-db'), **anvio.K('profile-db'))
    parser.add_argument(*anvio.A('contigs-db'), **anvio.K('contigs-db'))
    parser.add_argument(*anvio.A('output-dir'), **anvio.K('output-dir'))
    parser.add_argument(*anvio.A('output-file-prefix'), **anvio.K('output-file-prefix'))

    args = parser.parse_args()

    try:
        main(args)
    except ConfigError as e:
        print(e)
        sys.exit(-1)
