#!/usr/bin/env python

# Copyright (C) 2017, Weizhi Song, Torsten Thomas.
# songwz03@gmail.com or t.thomas@unsw.edu.au

# MetaCHIP is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# MetaCHIP is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


import sys
import copy
import argparse
from MetaCHIP.PI import PI
from MetaCHIP import BP
from datetime import datetime
from MetaCHIP import MetaCHIP_config
from MetaCHIP import filter_HGT
from MetaCHIP import update_hmms
from MetaCHIP import get_SCG_tree
from MetaCHIP import SankeyTaxon
from MetaCHIP.MetaCHIP_config import config_dict


to_do = '''

1. not print if disabled: [2018-12-09 21:54:32] Plotting flanking regions with 16 cores
2. steps move to PG.py: uclust, get species tree
3. if no PG validated, skip plot, but not report error
4. move flk plot to the last step
5. get file list by RE is very slow (put files to delete into a tmp folder, then delete the folder)

'''


def version():
    version_file = open('%s/VERSION' % MetaCHIP_config.config_file_path)
    return version_file.readline().strip()


def print_main_help():

    help_message = ''' 
            ...::: MetaCHIP v%s :::...
        
    Core modules:
       PI             ->    Prepare input files 
       BP             ->    Run Best-match and Phylogenetic approaches
       
    Supplementary modules:
       CMLP           ->    Combine multi-level predictions (part of BP module)
       filter_HGT     ->    Get HGTs predicted at least n levels (for multi-level predictions)
       update_hmms    ->    update hmm profiles used for inferring SCG tree
       get_SCG_tree   ->    Get SCG protein tree
       SankeyTaxon    ->    Visualize taxonomic classification with Sankey plot
       plot_circos    ->    [to be added] Visualize gene flow with circos plot
       rename_cotig   ->    [to be added] rename contigs if their ids longer than 22 letters
       
    # for command specific help
    MetaCHIP PI -h
    MetaCHIP BP -h

    ''' % version()

    print(help_message)


if __name__ == '__main__':

    ############################################## initialize subparsers ###############################################

    # initialize the options parser
    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers(help="--", dest='subparser_name')

    PI_parser =             subparsers.add_parser('PI',             description='Prepare input files',                                  epilog='Example: MetaCHIP PI -h')
    BP_parser =             subparsers.add_parser('BP',             description='BM and PG approach',                                   epilog='Example: MetaCHIP BP -h')
    CMLP_parser =           subparsers.add_parser('CMLP',           description='Combine multiple level predictions',                   epilog='Example: MetaCHIP CMLP -h')
    filter_HGT_parser =     subparsers.add_parser('filter_HGT',     description='get HGTs detected at least n levels',                  usage=filter_HGT.filter_HGT_usage)
    update_hmms_parser =    subparsers.add_parser('update_hmms',    description='update hmm profiles',                                  usage=update_hmms.update_hmms_usage)
    get_SCG_tree_parser =   subparsers.add_parser('get_SCG_tree',   description='get SCG tree',                                         usage=get_SCG_tree.get_SCG_tree_usage)
    SankeyTaxon_parser =    subparsers.add_parser('SankeyTaxon',    description='Visualize taxonomic classification with Sankey plot',  usage=SankeyTaxon.SankeyTaxon_parser_usage)


    ######################################### define arguments for subparsers ##########################################

    # add arguments for PI_parser
    PI_parser.add_argument('-i',                        required=True,                          help='input genome folder')
    PI_parser.add_argument('-taxon',                    required=False,                         help='taxonomic classification')
    PI_parser.add_argument('-p',                        required=True,                          help='output prefix')
    PI_parser.add_argument('-r',                        required=False, default=None,           help='grouping rank, choose from p (phylum), c (class), o (order), f (family), g (genus) or any combination of them')
    PI_parser.add_argument('-g',                        required=False, default=None,           help='grouping file')
    PI_parser.add_argument('-x',                        required=False, default='fasta',        help='file extension')
    PI_parser.add_argument('-grouping_only',            required=False, action="store_true",    help='run grouping only, deactivate Prodigal and Blastn')
    PI_parser.add_argument('-nonmeta',                  required=False, action="store_true",    help='annotate Non-metagenome-assembled genomes (Non-MAGs)')
    PI_parser.add_argument('-noblast',                  required=False, action="store_true",    help='not run all-vs-all blastn')
    PI_parser.add_argument('-t',                        required=False, type=int, default=1,    help='number of threads, default: 1')
    PI_parser.add_argument('-blastn_js_header',         required=False,                         help='speed up all-against-all blastn with separated job script for each of the input genome, provide the job script header here')
    PI_parser.add_argument('-qsub',                     required=False, action="store_true",    help='specify to automatically submit generated job scripts, otherwise, submit them manually')
    PI_parser.add_argument('-quiet',                    required=False, action="store_true",    help='not report progress')
    PI_parser.add_argument('-tmp',                      required=False, action="store_true",    help='keep temporary files')

    # add arguments for BP_parser
    BP_parser.add_argument('-p',                        required=True,                          help='output prefix')
    BP_parser.add_argument('-r',                        required=False, default=None,           help='grouping rank, choose from p (phylum), c (class), o (order), f (family), g (genus) or any combination of them')
    BP_parser.add_argument('-g',                        required=False, default=None,           help='grouping file')
    BP_parser.add_argument('-cov',                      required=False, type=int,   default=75, help='coverage cutoff, default: 75')
    BP_parser.add_argument('-al',                       required=False, type=int,   default=200,help='alignment length cutoff, default: 200')
    BP_parser.add_argument('-flk',                      required=False, type=int,   default=10, help='the length of flanking sequences to plot (Kbp), default: 10')
    BP_parser.add_argument('-ip',                       required=False, type=int,   default=90, help='identity percentile cutoff, default: 90')
    BP_parser.add_argument('-ei',                       required=False, type=float, default=90, help='end match identity cutoff, default: 90')
    BP_parser.add_argument('-t',                        required=False, type=int,   default=1,  help='number of threads, default: 1')
    BP_parser.add_argument('-plot_iden',                required=False, action="store_true",    help='plot identity distribution')
    BP_parser.add_argument('-NoEbCheck',                required=False, action="store_true",    help='disable end break and contig match check for fast processing, not recommend for metagenome-assembled genomes (MAGs)')
    BP_parser.add_argument('-force',                    required=False, action="store_true",    help='overwrite previous results')
    BP_parser.add_argument('-quiet',                    required=False, action="store_true",    help='Do not report progress')
    BP_parser.add_argument('-tmp',                      required=False, action="store_true",    help='keep temporary files')

    # add arguments for CMLP_parser
    CMLP_parser.add_argument('-p',                      required=True,                          help='output prefix')
    CMLP_parser.add_argument('-r',                      required=False, default=None,           help='grouping rank, choose from p (phylum), c (class), o (order), f (family), g (genus) or any combination of them')
    CMLP_parser.add_argument('-cov',                    required=False, type=int,   default=75, help='coverage cutoff, default: 75')
    CMLP_parser.add_argument('-al',                     required=False, type=int,   default=200,help='alignment length cutoff, default: 200')
    CMLP_parser.add_argument('-flk',                    required=False, type=int,   default=10, help='the length of flanking sequences to plot (Kbp), default: 10')
    CMLP_parser.add_argument('-ip',                     required=False, type=int,   default=90, help='identity percentile cutoff, default: 90')
    CMLP_parser.add_argument('-ei',                     required=False, type=float, default=90, help='end match identity cutoff, default: 90')
    CMLP_parser.add_argument('-t',                      required=False, type=int,   default=1,  help='number of threads, default: 1')

    # add arguments for filter_HGT_parser
    filter_HGT_parser.add_argument('-i',                required=True,                          help='txt file containing detected HGTs, e.g. [prefix]_[ranks]_detected_HGTs.txt ')
    filter_HGT_parser.add_argument('-n',                required=True, type=int,                help='HGTs detected at least n levels, 2 <= n <= 5')
    filter_HGT_parser.add_argument('-plot',             required=False,                         help='flanking plots folder')

    # add arguments for update_hmm
    update_hmms_parser.add_argument('-hmm',             required=True,                          help='MetaCHIP_phylo.hmm file')
    update_hmms_parser.add_argument('-p_db',            required=False, default=None,           help='Pfam db file, e.g. Pfam-A.hmm')
    update_hmms_parser.add_argument('-t_db',            required=False, default=None,           help='TIGRFAMs db folder, e.g. TIGRFAMs_14.0_HMM')

    # add arguments for get_SCG_tree
    get_SCG_tree_parser.add_argument('-i',              required=True,                          help='input genome folder')
    get_SCG_tree_parser.add_argument('-p',              required=True,                          help='output prefix')
    get_SCG_tree_parser.add_argument('-x',              required=False, default='fasta',        help='file extension')
    get_SCG_tree_parser.add_argument('-nonmeta',        required=False, action="store_true",    help='annotate Non-metagenome-assembled genomes (Non-MAGs)')
    get_SCG_tree_parser.add_argument('-t',              required=False, type=int, default=1,    help='number of threads, default: 1')

    # add arguments for SankeyTaxon
    SankeyTaxon_parser.add_argument('-taxon',           required=True,                          help='taxon classification results')
    SankeyTaxon_parser.add_argument('-r',               required=True,                          help='taxon ranks to plot, e.g. dpcofgs, pco, pcf, cfs')
    SankeyTaxon_parser.add_argument('-p',               required=True,                          help='output prefix')
    SankeyTaxon_parser.add_argument('-ec',              required=False, action='store_true',    help='only plot explicit classifications')
    SankeyTaxon_parser.add_argument('-x',               required=False, type=int,               help='plot width')
    SankeyTaxon_parser.add_argument('-y',               required=False, type=int,               help='plot height')


    ############################## parse provided arguments and run corresponding function #############################

    # get and check options
    args = None
    if (len(sys.argv) == 1) or (sys.argv[1] == '-h') or (sys.argv[1] == '-help') or (sys.argv[1] == '--help'):
        print_main_help()
        sys.exit(0)

    else:
        args = vars(parser.parse_args())

    time_format = '[%Y-%m-%d %H:%M:%S]'


    #################### run PI module ####################

    if args['subparser_name'] == 'PI':

        if args['g'] is not None:
            PI(args, config_dict)

        else:
            detection_ranks_str = args['r']

            # for single level detection
            if len(detection_ranks_str) == 1:
                PI(args, config_dict)

            # for multiple level prediction
            if len(detection_ranks_str) > 1:
                gene_predicted = 0
                for detection_rank_PI in detection_ranks_str:
                    current_rank_args_PI = copy.deepcopy(args)
                    current_rank_args_PI['r'] = detection_rank_PI

                    if gene_predicted == 0:
                        current_rank_args_PI['grouping_only'] = False
                        PI(current_rank_args_PI, config_dict)
                        gene_predicted = 1
                    else:
                        current_rank_args_PI['grouping_only'] = True
                        PI(current_rank_args_PI, config_dict)


    #################### run BP module ####################

    if args['subparser_name'] == 'BP':

        if args['g'] is not None:

            BP.BM(args, config_dict)
            BP.PG(args, config_dict)
            BP.combine_multiple_level_predictions(args, config_dict)

        else:
            detection_ranks_str = args['r']

            # for single level detection
            if len(detection_ranks_str) == 1:
                BP.BM(args, config_dict)
                BP.PG(args, config_dict)

            # for multiple level prediction
            if len(detection_ranks_str) > 1:
                for detection_rank_BP in detection_ranks_str:
                    current_rank_args_BP = copy.deepcopy(args)
                    current_rank_args_BP['r'] = detection_rank_BP
                    current_rank_args_BP['quiet'] = True

                    print('%s Detect HGT at level: %s' % ((datetime.now().strftime(time_format)), detection_rank_BP))
                    BP.BM(current_rank_args_BP, config_dict)
                    BP.PG(current_rank_args_BP, config_dict)

            # combine multiple level predictions
            BP.combine_multiple_level_predictions(args, config_dict)

    #################### run supplementary modules ####################

    if args['subparser_name'] == 'CMLP':
        BP.CMLP(args, config_dict)

    if args['subparser_name'] == 'filter_HGT':
        filter_HGT.filter_HGT(args)

    if args['subparser_name'] == 'update_hmms':
        update_hmms.update_hmms(args)

    if args['subparser_name'] == 'get_SCG_tree':
        get_SCG_tree.get_SCG_tree(args, config_dict)

    if args['subparser_name'] == 'SankeyTaxon':
        SankeyTaxon.SankeyTaxon(args)
