#!/usr/bin/env python
# -*- coding: utf-8
"""anvi-split-locus, first of its name, splitter of loci, and generator of mini contigs databases

  You want to play with it? This is how you could quickly test it:

  Downloaded the anvi'o data pack for the infant gut data, which is here:

    https://ndownloader.figshare.com/files/8252861

  Uunpack it and went into it:

    tar -zxvf INFANTGUTTUTORIAL.tar.gz && cd INFANT-GUT-TUTORIAL

  Then I run the program `anvi-export-gene-block` in the anvi'o master this way:

    anvi-export-locus -c CONTIGS.db \
                      -O OUTPUT \
                      --use-hmm \
                      --hmm-sources Campbell_et_al \
                      --search-term Ribosomal_L27 \
                      -n 24

  This will give you a new contigs database with a single contig that contains the Ribosomal_L27 gene and
  the 24 genes that follow it.

  If all you want is the sequence of a gene matching a search pattern then use anvi-get-sequences-for-hmm-hits
  for HMM searches, and anvi-search-functions for searches in functional annotations.
"""

import sys
import argparse

import anvio
import anvio.terminal as terminal

from anvio.splitter import LocusSplitter
from anvio.errors import ConfigError, FilesNPathsError


__author__ = "Developers of anvi'o (see AUTHORS.txt)"
__copyright__ = "Copyleft 2015-2018, the Meren Lab (http://merenlab.org/)"
__credits__ = []
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"
__requires__ = ['contigs-db']
__provides__ = ['locus-fasta']


run = terminal.Run()
progress = terminal.Progress()


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="This program helps you cut a 'locus' from a larger context. Anvi'o does this by \
                                                  starting from one gene, and extending its selection based on how many genes you \
                                                  want from its upstream or downstream in the contig its found. The first gene anvi'o \
                                                  focuses on is defined by you: you can either use a function name in any of the functional \
                                                  annotation databases or default or user-defined HMMs, or use a gene callers ID. If \
                                                  everything goes alright, you will get a separate contigs database for every matching \
                                                  gene anvi'o found in your master contigs database. Enjoy your mini contigs databases.")

    groupA = parser.add_argument_group('Essential INPUT')
    groupA.add_argument(*anvio.A('contigs-db'), **anvio.K('contigs-db'))
    groupA.add_argument('-n', '--num-genes',type=str, help='For each match (to the function, or HMM that was searched) a sequence which includes \
                            a block of genes will be saved. The block could include either genes only in the forward direction of the gene (defined \
                            according to the direction of transcription of the gene) or reverse or both. \
                            If you wish to get both direction use a comma (no spaces) to define the block \
                            For example, \"-n 4,5\" will give you four genes before and five genes after. \
                            Whereas, \"-n 5\" will give you five genes after (in addition to the gene that matched). \
                            To get only genes preceeding the match use \"-n 5,0\". \
                            If the number of genes requested exceeds the length of the contig, then the output \
                            will include the sequence until the end of the contig.', required=True)
    groupB =  parser.add_argument_group('Additional essential INPUT - OPTION 1', "Search according to either HMM or functional annotations")
    groupB.add_argument('-s', '--search-term', help='Search term.')
    groupC =  parser.add_argument_group('Additional essential INPUT - OPTION 2', "Search specific gene id's")
    groupC.add_argument(*anvio.A('gene-caller-ids'), **anvio.K('gene-caller-ids'))
    groupC.add_argument(*anvio.A('delimiter'), **anvio.K('delimiter'))

    groupD = parser.add_argument_group('THE OUTPUT', "Where should the output go. It will be one FASTA file with all matches \
                                       or one FASTA per match (see --separate-fasta)") 
    groupD.add_argument(*anvio.A('output-dir'), **anvio.K('output-dir'))
    groupD.add_argument(*anvio.A('output-file-prefix'), **anvio.K('output-file-prefix', {'required': True}))

    groupE = parser.add_argument_group('ADDITIONAL STUFF', "Flags and parameters you can set according to your need")
    groupE.add_argument('--separate-fasta', default = False, action='store_true', help='Split each match to a separate FASTA file.')
    groupE.add_argument('--use-hmm', default = False, action='store_true', help='Use HMM hits instead of functional annotations. \
                            If you choose this option, you must also say which HMM source to use.')
    groupE.add_argument(*anvio.A('hmm-sources'), **anvio.K('hmm-sources'))
    groupE.add_argument(*anvio.A('list-hmm-sources'), **anvio.K('list-hmm-sources'))
    parser.add_argument(*anvio.A('annotation-sources'), **anvio.K('annotation-sources'))
    groupE.add_argument(*anvio.A('overwrite-output-destinations'), **anvio.K('overwrite-output-destinations'))
    groupE.add_argument(*anvio.A('remove-partial-hits'), **anvio.K('remove-partial-hits'))
    groupE.add_argument(*anvio.A('never-reverse-complement'), **anvio.K('never-reverse-complement'))

    args = anvio.get_args(parser)

    try:
        locus_splitter = LocusSplitter(args)
        locus_splitter.process()
    except ConfigError as e:
        print(e)
        sys.exit(-1)
    except FilesNPathsError as e:
        print(e)
        sys.exit(-1)
