#!/usr/bin/env python
# -*- coding: utf-8
"""Return frequencies of amino acids in a gene

   Takes a bunch of BAM files, and a unique gene caller ID to count
   AA linkmer frequencies"""

import sys

import anvio
import anvio.utils as utils
import anvio.bamops as bamops
import anvio.terminal as terminal

from anvio.errors import ConfigError, FilesNPathsError
from anvio.dbops import ContigsSuperclass
from anvio.constants import codon_to_AA


__author__ = "A. Murat Eren"
__copyright__ = "Copyright 2015, The anvio Project"
__credits__ = []
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"


run = terminal.Run()
progress = terminal.Progress()


def main(args):
    c = ContigsSuperclass(args)
    c.init_contig_sequences()

    bam_file_object = bamops.BAMFileObject(args.input_file).get()

    if args.gene_caller_id not in c.genes_in_contigs_dict:
        raise ConfigError("The contigs db does not have the gene caller id '%d' :/" % args.gene_caller_id)

    gene_call = c.genes_in_contigs_dict[args.gene_caller_id]

    if gene_call['partial']:
        raise ConfigError('This seems to be a partial gene call. Sorry :/')

    contig_sequence = c.contig_sequences[gene_call['contig']]['sequence']

    aa_frequencies = bamops.AAFrequencies()

    d = {}

    progress.new('Busy code is busy')
    progress.update('Generating AA frequencies dict ...')
    aa_frequencies_dict = aa_frequencies.process_gene_call(bam_file_object, gene_call, contig_sequence)

    progress.update('Working on the output ...')
    for codon_order in aa_frequencies_dict:
        entry = aa_frequencies_dict[codon_order]
        d[codon_order] = {'reference': entry['reference'], 'coverage': entry['coverage'],
                          'contig_name': gene_call['contig'], 'start': gene_call['start'],
                          'stop': gene_call['stop'], 'direction': gene_call['direction']}

        for aa in list(codon_to_AA.values()):
            d[codon_order][aa] = entry['frequencies'][aa]

    header = ['codon_order_in_gene', 'contig_name', 'start', 'stop', 'direction', 'reference', 'coverage'] + sorted(list(set(codon_to_AA.values())))
    progress.update('Storing output ...')
    utils.store_dict_as_TAB_delimited_file(d, args.output_file, header)

    progress.end()
    run.info('output', args.output_file)

if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(description='Frequencies of AA linkmers')
    parser.add_argument('-i', '--input-file', metavar = 'INPUT_BAM', default = None, required = True,
                        help = 'Sorted and indexed BAM file to analyze.')

    parser.add_argument(*anvio.A('contigs-db'), **anvio.K('contigs-db'))
    parser.add_argument(*anvio.A('gene-caller-id'), **anvio.K('gene-caller-id', {'required': True}))
    parser.add_argument(*anvio.A('output-file'), **anvio.K('output-file', {'required': True}))


    args = parser.parse_args()

    try:
        main(args)
    except ConfigError as e:
        print(e)
        sys.exit(-1)
    except FilesNPathsError as e:
        print(e)
        sys.exit(-2)
