#! /usr/bin/env python
# -*- encoding: utf-8 -*-
from collections import deque
import logging
import random
from multiprocessing import Queue
import os
import signal
import shutil
import time

from configargparse import ArgParser
import pkg_resources
import psutil

import autosklearn.automl
from autosklearn.data.data_manager_factory import \
    populate_argparse_with_data_options

def namespace_to_automl_format(namespace):
    if namespace.resampling_strategy == 'holdout':
        strategy = 'holdout'
        arguments = None
    elif namespace.resampling_strategy == 'holdout-iterative-fit':
        strategy = 'holdout-iterative-fit'
        arguments = None
    elif namespace.resampling_strategy == 'cv':
        strategy = 'cv'
        arguments = {'folds': namespace.folds}
    elif namespace.resampling_strategy == 'partial-cv':
        strategy = 'partial-cv'
        arguments = {'folds': namespace.folds}
    elif namespace.resampling_strategy == 'nested-cv':
        strategy = 'nested-cv'
        arguments = {'inner_folds': namespace.inner_folds,
                     'outer_folds': namespace.outer_folds}
    else:
        raise ValueError(namespace.resampling_strategy)
    return strategy, arguments

def get_options():
    parser = ArgParser()
    parser.add_argument('-c', '--config',
                        help='Path to a configuration file.',
                        is_config_file=True)

    # Arguments concerning the file output
    parser.add_argument('--output-dir',
                        type=str,
                        default=None,
                        help='AutoSklearn output directory. If not specified, '
                             'a new directory under /tmp/ will be generated.')
    parser.add_argument('--temporary-output-directory',
                        type=str,
                        help='Temporary output directory. If not specified, '
                             'a new directory under /tmp/ will be generated.',
                        default=None)
    parser.add_argument('--keep-output',
                        action='store_true',
                        default=False,
                        help='If output_dir and temporary_output_dir are not '
                             'specified, setting this to False will make '
                             'autosklearn not delete these two directories.')

    # Arguments concerning the configuration procedure
    parser.add_argument('--time-limit',
                        type=int,
                        default=3600,
                        help='Total runtime of the AutoSklearn package in '
                             'seconds.')
    parser.add_argument('--per-run-time-limit',
                        type=int,
                        default=360,
                        help='Runtime for a single call of a target algorithm.')
    parser.add_argument('--ml-memory-limit',
                        type=int,
                        default=3000,
                        help='Memory limit for the machine learning pipeline '
                             'being optimized')

    # Arguments concerning the metalearning part
    parser.add_argument('--metalearning-configurations',
                        type=int,
                        default=25,
                        help='Number of configurations which will be used as '
                             'initial challengers for SMAC.')

    # Arguments concerning the algorithm selection part
    parser.add_argument('--ensemble-size',
                        type=int,
                        default=1,
                        help='Maximum number of models in the ensemble. Set '
                             'this to one in order to evaluate the single '
                             'best.')
    parser.add_argument('--ensemble-nbest',
                        type=int,
                        default=1,
                        help='Consider only the best n models in the ensemble '
                             'building process.')

    # Arguments concerning ParamSklearn
    parser.add_argument('--include-estimators',
                        nargs='*',
                        default=None,
                        type=str,
                        help='Only use these estimators inside auto-sklearn')
    parser.add_argument('--include-preprocessors',
                        nargs='*',
                        default=None,
                        type=str,
                        help='Only use these preprocessors inside auto-sklearn')

    # Other
    parser.add_argument('-s', '--seed',
                        type=int,
                        default=1,
                        help='Seed for random number generators.')
    parser.add_argument('--exec-dir', type=str, help='Execution directory.')
    parser.add_argument(
        '--metadata-directory',
        help='DO NOT CHANGE THIS UNLESS YOU KNOW WHAT YOU ARE DOING.'
             '\nReads the metadata from a different directory. '
             'This feature should only be used to perform studies '
             'on the performance of AutoSklearn.')

    parser = populate_argparse_with_data_options(parser)

    return parser


if __name__ == '__main__':
    parser = get_options()
    args = parser.parse_args()

    time_limit = args.time_limit
    start_time = time.time()

    # Go to the execution directory
    if args.exec_dir is not None:
        os.chdir(args.exec_dir)

    # Check the output directories
    output_dir = args.output_dir
    tmp_dataset_dir = args.temporary_output_directory

    pid = os.getpid()
    random_number = random.randint(0, 10000)
    remove_output_dir = False
    remove_tmp_dir = False

    if output_dir is None:
        output_dir = '/tmp/autosklearn_output_%d_%d' % (pid, random_number)
        os.makedirs(output_dir)
        remove_output_dir = not args.keep_output
    else:
        if not os.path.isdir(output_dir):
            raise ValueError('If output_dir is specified, it must exist: %s'
                             % output_dir)
        output_dir = os.path.join(output_dir, str(args.seed))
        os.mkdir(output_dir)

    if tmp_dataset_dir is None:
        tmp_dataset_dir = '/tmp/autosklearn_tmp_%d_%d' % (pid, random_number)
        os.makedirs(tmp_dataset_dir)
        remove_tmp_dir = not args.keep_output
    else:
        if not os.path.isdir(tmp_dataset_dir):
            raise ValueError('If tmp_dataset_dir is specified, it must exist: %s'
                             % tmp_dataset_dir)
        tmp_dataset_dir = os.path.join(tmp_dataset_dir)

    BUFFER = 35  # time-left - BUFFER = timelimit for SMAC/ensemble_script.py
    BUFFER_BEFORE_SENDING_SIGTERM = 30  # We send SIGTERM to all processes
    DELAY_TO_SIGKILL = 15  # And after a delay we send a sigkill

    resampling_strategy, resampling_strategy_arguments = \
        namespace_to_automl_format(args)

    time_spent_so_far = time.time() - start_time
    time_left_for_smac = time_limit - time_spent_so_far
    queue = Queue()
    automl = autosklearn.automl.AutoML(
        tmp_dataset_dir,
        output_dir,
        time_left_for_smac,
        args.per_run_time_limit,
        log_dir=tmp_dataset_dir,
        ensemble_size=args.ensemble_size,
        ensemble_nbest=args.ensemble_nbest,
        initial_configurations_via_metalearning=args.metalearning_configurations,
        seed=args.seed,
        ml_memory_limit=args.ml_memory_limit,
        metadata_directory=args.metadata_directory,
        queue=queue,
        keep_models=False,
        debug_mode=False,
        include_estimators=args.include_estimators,
        include_preprocessors=args.include_preprocessors,
        resampling_strategy=resampling_strategy,
        resampling_strategy_arguments=resampling_strategy_arguments)
    automl.start_automl(args)

    while time.time() - start_time <= time_limit - \
            BUFFER_BEFORE_SENDING_SIGTERM:

        if automl.exitcode is not None and automl.exitcode != 0:
            break

        try:
            [time_needed_to_load_data, data_manager_file, procs] \
                = queue.get_nowait()
            break
        except:
            time.sleep(1)

    # == And now we wait till we run out of time
    while time.time() - start_time <= time_limit - \
            BUFFER_BEFORE_SENDING_SIGTERM:
        if automl.exitcode is not None and automl.exitcode != 0:
            break

        time.sleep(1)

    # Kill all children, grand-children and so on
    process = psutil.Process()
    # All children which must be killed
    children = deque()
    children.extendleft(process.children(recursive=True))

    try:
        for proc in procs:
            try:
                process = psutil.Process(proc.pid)
                children.appendleft(process)
                children.extendleft(process.children(recursive=True))
            except Exception as e:
                print e
    except NameError:
        pass

    for delay, sig in \
            [(0, signal.SIGINT),
             (BUFFER_BEFORE_SENDING_SIGTERM / 3., signal.SIGTERM),
             (BUFFER_BEFORE_SENDING_SIGTERM / 3. * 2., signal.SIGKILL)]:
        visited = set()

        # first, send SIGINT
        while len(children) > 0:
            child = children.pop()
            # First, check if all children of child are in the children set
            if not child.is_running():
                continue

            try:
                grandchildren = child.children(recursive=True)
                for grandchild in grandchildren:
                    if grandchild in visited:
                        continue
                    else:
                        children.appendleft(grandchild)
            except psutil.NoSuchProcess:
                pass

            # Then, send the signal
            try:
                child.send_signal(sig)
            except psutil.NoSuchProcess:
                pass

            visited.add(child)

        # Readd all children we ever found to the list which were running in
        # the last iteration of killing processes to make sure that we killed
        #  them all.
        children.extendleft(visited)

        while time.time() - start_time <= time_limit - delay:
            if automl.exitcode is not None and automl.exitcode != 0:
                break

            time.sleep(1)

    # Don't do this before killing children, because otherwise psutil cannot
    # find them any more
    try:
        automl.terminate()
    except Exception:
        pass

    if remove_output_dir is True:
        shutil.rmtree(output_dir)
    if remove_tmp_dir is True:
        shutil.rmtree(tmp_dataset_dir)
