#!/usr/bin/env python

# Core python library
import argparse
import datetime
import logging
import fnmatch
import time
import glob
import os

# Azure-related imports - need blob service and batch service modules
from azure.storage.blob import BlockBlobService
from azure.storage.blob import BlobPermissions
import azure.batch.batch_service_client as batch
import azure.batch.batch_auth as batch_auth
import azure.batch.models as batchmodels
import msrest
# Apparently different credentials (through active directory) are needed in order to create a pool from a custom image.
# Can't find documentation anywhere, but StackOverflow is at least somewhat helpful:
# https://stackoverflow.com/questions/46756780/azure-batch-pool-how-do-i-use-a-custom-vm-image-via-python
from azure.common.credentials import ServicePrincipalCredentials

# TODO: Make some custom errors with better names so that not everything is an AttributeError


def download_container(blob_service, container_name, output_dir):
    # Modified from https://blogs.msdn.microsoft.com/brijrajsingh/2017/05/27/downloading-a-azure-blob-storage-container-python/
    generator = blob_service.list_blobs(container_name)
    for blob in generator:
        # check if the path contains a folder structure, create the folder structure
        if "/" in blob.name:
            # extract the folder path and check if that folder exists locally, and if not create it
            head, tail = os.path.split(blob.name)
            if os.path.isdir(os.path.join(output_dir, head)):
                # download the files to this directory
                blob_service.get_blob_to_path(container_name, blob.name, os.path.join(output_dir, head, tail))
            else:
                # create the diretcory and download the file to it
                os.makedirs(os.path.join(output_dir, head))
                blob_service.get_blob_to_path(container_name, blob.name, os.path.join(output_dir, head, tail))
        else:
            blob_service.get_blob_to_path(container_name, blob.name, blob.name)


class AzureBatch:
    """
    Class for doing azure batch operations to make job submission relatively easy.
    The parse_configuration_file function needs to be run to create one of these objects and set
    attributes, or things really won't work very well (or at all).
    Should also run the checks to make sure that nothing is None (check_no_attributes_none) and that all keys
    for the command, input, and output dictionaries are all present and matching (check_input_output_command_match)
    """
    def __init__(self):
        # Initially, have all these attrs set to None, and set them as needed.
        self.batch_account_name = None
        self.batch_account_key = None
        self.batch_account_url = None
        self.storage_account_name = None
        self.storage_account_key = None
        self.job_name = None
        self.vm_image = None
        self.vm_size = 'Standard_D16s_v3'  # This should be sufficient for essentially anything. User can customize
        # if they really need something bigger (or smaller to save money)
        # Things needed for authentication through active directory, which is apparently necessary.
        self.vm_client_id = None
        self.vm_secret = None
        self.vm_tenant = None
        # Input and output both take the form of dictionaries that contain lists.
        # Keys for the dictionaries are input and output IDs - user can have INPUT_1 and INPUT_2 in the config file,
        # and then we'll get inputs 1 and 2, each having their own files. Same idea for outputs and commands.
        # Any key in input has to be a key in output and command as well, and vice versa (all three ways)
        self.input = dict()
        self.output = dict()
        self.command = dict()
        # Make it so that we can also use things that are already in blob storage.
        # These input file lines should be CLOUDIN:= with _1 or whatever, same is input/output/command.
        # A full proper line would be CLOUDIN:=container_name/ destination_dir
        # This would get everything in container_name and put it into destination_dir, while preserving directory
        # structure if there was any
        self.cloud_input = dict()

    def _login_to_batch(self):
        """
        Uses credentials stored in object to login to Azure batch.
        :return: an instance of batch_client (azure.batch.batch_service_client)
        """
        credentials = batch_auth.SharedKeyCredentials(self.batch_account_name, self.batch_account_key)
        batch_client = batch.BatchServiceClient(credentials, base_url=self.batch_account_url)
        try:  # Try an operation that will error if the batch client credentials were not correct.
            batch_client.pool.get_all_lifetime_statistics()
        except batchmodels.BatchErrorException:  # This exception occurs if KEY or NAME is incorrect.
            raise AttributeError('Batch client could not be authenticated. Likely cause is your BATCH_ACCOUNT_KEY or'
                                 'BATCH_ACCOUNT_NAME being incorrect.')
        except msrest.exceptions.ClientRequestError:  # This occurs when the provided URL is incorrect.
            raise AttributeError('Batch client could not be authenticated. Check that your BATCH_ACCOUNT_URL is correct.')
        return batch_client

    @staticmethod
    def _create_resource_file(blob_service, file_to_upload, input_container_name, destination_dir=False):
        """
        Given a file on a local machine, creates a resource file that Azure Batch can work with so that the input
        files will be uploaded to Batch service
        :param blob_service: Instatiated block_blob_service object (azure.storage.blob.BlockBlobService)
        :param file_to_upload: Path to file to upload on local machine.
        :param input_container_name: Name of the container to be used to store the input files. Must already have been
        created.
        :param destination_dir: Destination directory on cloud machine that process will run on. If False,
        will be uploaded to root dir.
        :return: An azure batchmodels resource file (azure.batch.models.ResourceFile)
        """
        blob_name = os.path.basename(file_to_upload)
        blob_service.create_blob_from_path(container_name=input_container_name,
                                           blob_name=blob_name,
                                           file_path=file_to_upload)
        sas_token = blob_service.generate_container_shared_access_signature(container_name=input_container_name,
                                                                            permission=BlobPermissions.READ,
                                                                            expiry=datetime.datetime.utcnow() + datetime.timedelta(hours=2))
        sas_url = blob_service.make_blob_url(container_name=input_container_name,
                                             blob_name=blob_name,
                                             sas_token=sas_token)
        if destination_dir:
            return batchmodels.ResourceFile(file_path=os.path.join(destination_dir, os.path.split(file_to_upload)[-1]),
                                            blob_source=sas_url)
        else:
            return batchmodels.ResourceFile(file_path=os.path.split(file_to_upload)[-1],
                                            blob_source=sas_url)

    def prepare_cloud_input_resource_files(self, input_id):
        """
        Creates resource files for input files already in blob storage to be used with batch service.
        :return: List of resource files (azure.batch.models.ResourceFile) to be submitted with a task
        """
        resource_files = list()
        blob_service = BlockBlobService(account_key=self.storage_account_key,
                                        account_name=self.storage_account_name)
        for input_request in self.cloud_input[input_id]:
            # Only one thing specified means no destination dir specified
            if len(input_request.split()) == 1:
                container_name = input_request.split('/')[0]
                sas_token = blob_service.generate_container_shared_access_signature(container_name=container_name,
                                                                                    permission=BlobPermissions.READ,
                                                                                    expiry=datetime.datetime.utcnow() + datetime.timedelta(hours=2))
                pattern = os.path.split(input_request)[1]
                if pattern == '':
                    pattern = '*'
                generator = blob_service.list_blobs(container_name=container_name)
                for blob in generator:
                    if fnmatch.fnmatch(blob.name, pattern):
                        sas_url = blob_service.make_blob_url(container_name=container_name,
                                                             blob_name=blob.name,
                                                             sas_token=sas_token)
                        resource = batchmodels.ResourceFile(file_path=blob.name,
                                                            blob_source=sas_url)
                        resource_files.append(resource)
            else:
                # When more than one input is specified, they all have to be from the same container.
                # This way, we only have to list all blobs once, which makes it actually possible
                # to have many requests coming from containers with lots of blobs
                things_to_upload = input_request.split()
                destination_dir = things_to_upload.pop()
                container_name = things_to_upload[0].split('/')[0]
                generator = blob_service.list_blobs(container_name=container_name)
                blobs = list()
                for item in generator:
                    blobs.append(item)
                for item in things_to_upload:
                    container_name = os.path.split(item)[0]
                    sas_token = blob_service.generate_container_shared_access_signature(container_name=container_name,
                                                                                        permission=BlobPermissions.READ,
                                                                                        expiry=datetime.datetime.utcnow() + datetime.timedelta(hours=2))
                    pattern = os.path.split(item)[1]
                    if pattern == '':
                        pattern = '*'
                    for blob in blobs:
                        if fnmatch.fnmatch(blob.name, pattern):
                            sas_url = blob_service.make_blob_url(container_name=container_name,
                                                                 blob_name=blob.name,
                                                                 sas_token=sas_token)
                            resource = batchmodels.ResourceFile(file_path=os.path.join(destination_dir, blob.name),
                                                                blob_source=sas_url)
                            resource_files.append(resource)
        return resource_files

    def upload_input_to_blob_storage(self, input_id):
        """
        Uploads input files to blob storage to be used with batch service.
        :return: List of resource files (azure.batch.models.ResourceFile) to be submitted with a task
        """
        # Instantiate our blob service! Maybe better to only do this once?
        resource_files = list()
        blob_service = BlockBlobService(account_key=self.storage_account_key,
                                        account_name=self.storage_account_name)
        # Create a container for each input request - should be jobname-input, all lower case.
        input_container_name = self.job_name + '-input' + input_id
        blob_service.create_container(container_name=input_container_name)
        for input_request in self.input[input_id]:
            # If input request is only one item, just upload that to default dir on cloud
            if len(input_request.split()) == 1:
                if os.path.isdir(input_request):
                    files_to_upload = recursive_file_list(input_request)
                    last_item_on_path = os.path.split(input_request)[-1]
                    for file_to_upload in files_to_upload:
                        if not input_request.endswith('/'):
                            input_request += '/'
                        upload_to_dir = file_to_upload.split(input_request)[-1]
                        upload_to_dir = os.path.split(upload_to_dir)[0]
                        upload_to_dir = os.path.join(last_item_on_path, upload_to_dir)
                        resource_files.append(self._create_resource_file(blob_service=blob_service,
                                                                         file_to_upload=file_to_upload,
                                                                         input_container_name=input_container_name,
                                                                         destination_dir=upload_to_dir))
                else:
                    files_to_upload = glob.glob(input_request)
                    for file_to_upload in files_to_upload:
                        resource_files.append(self._create_resource_file(blob_service=blob_service,
                                                                         file_to_upload=file_to_upload,
                                                                         input_container_name=input_container_name))

            # If more than one item, last item is the destination directory on cloud vm that will run analysis.
            if len(input_request.split()) > 1:
                things_to_upload = input_request.split()
                destination_dir = things_to_upload.pop()
                for thing in things_to_upload:
                    if os.path.isdir(thing):
                        files_to_upload = recursive_file_list(thing)
                        last_item_on_path = os.path.split(thing)[-1]
                        for file_to_upload in files_to_upload:
                            if not thing.endswith('/'):
                                thing += '/'
                            upload_to_dir = file_to_upload.split(thing)[-1]
                            upload_to_dir = os.path.split(upload_to_dir)[0]
                            upload_to_dir = os.path.join(last_item_on_path, upload_to_dir)
                            resource_files.append(self._create_resource_file(blob_service=blob_service,
                                                                             file_to_upload=file_to_upload,
                                                                             input_container_name=input_container_name,
                                                                             destination_dir=os.path.join(destination_dir, upload_to_dir)))
                    else:
                        files_to_upload = glob.glob(thing)
                        for file_to_upload in files_to_upload:
                            resource_files.append(self._create_resource_file(blob_service=blob_service,
                                                                             file_to_upload=file_to_upload,
                                                                             input_container_name=input_container_name,
                                                                             destination_dir=destination_dir))
        return resource_files

    def create_pool(self, num_nodes):
        """
        Creates a pool of Ubuntu16.04 machines.
        """
        # In order to have this work the VM we're running things from needs to have some stuff done to it.
        # First, create a service principal for it:
        # https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-group-create-service-principal-portal#required-permissions
        # And then go to https://docs.microsoft.com/en-us/azure/batch/batch-aad-auth
        # and follow the 'Use Integrated Authentication' to add Batch Service as something for your app to use.
        credentials = ServicePrincipalCredentials(
            client_id=self.vm_client_id,
            secret=self.vm_secret,
            tenant=self.vm_tenant,
            resource='https://batch.core.windows.net/'
        )
        batch_client = batch.BatchServiceClient(credentials, base_url=self.batch_account_url)
        new_pool = batch.models.PoolAddParameter(
            id=self.job_name,
            virtual_machine_configuration=batchmodels.VirtualMachineConfiguration(
                image_reference=batchmodels.ImageReference(
                    virtual_machine_image_id=self.vm_image,
                    ),
                node_agent_sku_id="batch.node.ubuntu 16.04"),
            vm_size=self.vm_size,
            target_dedicated_nodes=num_nodes,
            target_low_priority_nodes=0
        )
        batch_client.pool.add(new_pool)

    def create_job(self):
        """
        Creates a job. Must have a pool created BEFORE you attempt to run this.
        """
        batch_client = self._login_to_batch()
        if batch_client.pool.exists(pool_id=self.job_name):
            job = batch.models.JobAddParameter(id=self.job_name, pool_info=batch.models.PoolInformation(pool_id=self.job_name))
        else:
            raise AttributeError('Job {} was added to a pool that does not exist. Pool must exist before a '
                                 'job can be added to that pool.'.format(self.job_name))
        batch_client.job.add(job)

    def delete_job(self):
        """
        Deletes a batch job. Add a check that the job exists before attempting deletion?
        """
        batch_client = self._login_to_batch()
        batch_client.job.delete(job_id=self.job_name)

    def delete_pool(self):
        """
        Deletes a pool. Raises AttributeError if pool trying to be deleted does not exist.
        """
        batch_client = self._login_to_batch()
        if batch_client.pool.exists(pool_id=self.job_name):
            batch_client.pool.delete(pool_id=self.job_name)
        else:
            raise AttributeError('Pool {} does not exist, and therefore cannot be deleted.'.format(self.job_name))

    def prepare_output_resource_files(self, sas_url, output_id):
        output_files = list()
        for output_request in self.output[output_id]:
            for output_item in output_request.split():
                # This means we're getting a directory, so need to go recursive
                if output_item.endswith('/'):
                    output_files.append(batchmodels.OutputFile(file_pattern=output_item + '*',
                                                               destination=batchmodels.OutputFileDestination(container=batchmodels.OutputFileBlobContainerDestination(container_url=sas_url,
                                                                                                                                                                      path=os.path.split(output_item)[0])),
                                                               upload_options=batchmodels.OutputFileUploadOptions(
                                                                   upload_condition=batchmodels.OutputFileUploadCondition.task_success
                                                               )))
                    output_files.append(batchmodels.OutputFile(file_pattern=output_item + '/**/*',
                                                               destination=batchmodels.OutputFileDestination(container=batchmodels.OutputFileBlobContainerDestination(container_url=sas_url,
                                                                                                                                                                      path=os.path.split(output_item)[0].replace('**', ''))),
                                                               upload_options=batchmodels.OutputFileUploadOptions(
                                                                   upload_condition=batchmodels.OutputFileUploadCondition.task_success
                                                               )))
                else:
                    output_files.append(batchmodels.OutputFile(file_pattern=output_item,
                                                               destination=batchmodels.OutputFileDestination(container=batchmodels.OutputFileBlobContainerDestination(container_url=sas_url,
                                                                                                                                                                      path=os.path.split(output_item)[0])),
                                                               upload_options=batchmodels.OutputFileUploadOptions(
                                                                   upload_condition=batchmodels.OutputFileUploadCondition.task_success
                                                               )))
        # Also add stdout and stderr.txt log files from the azure container. This is done even if task isn't successful
        output_files.append(batchmodels.OutputFile(file_pattern='../stderr.txt',
                                                   destination=batchmodels.OutputFileDestination(container=batchmodels.OutputFileBlobContainerDestination(container_url=sas_url,
                                                                                                                                                          path=self.job_name + '_' + output_id + '_stderr.txt')),
                                                   upload_options=batchmodels.OutputFileUploadOptions(
                                                       upload_condition=batchmodels.OutputFileUploadCondition.task_completion)
                                                   ))
        output_files.append(batchmodels.OutputFile(file_pattern='../stdout.txt',
                                                   destination=batchmodels.OutputFileDestination(container=batchmodels.OutputFileBlobContainerDestination(container_url=sas_url,
                                                                                                                                                          path=self.job_name + '_' + output_id + '_stdout.txt')),
                                                   upload_options=batchmodels.OutputFileUploadOptions(
                                                       upload_condition=batchmodels.OutputFileUploadCondition.task_completion)
                                                   ))
        return output_files

    def download_output_files_and_delete_container(self, output_dir, output_id):
        output_container = self.job_name + '-output' + output_id
        blob_service = BlockBlobService(account_key=self.storage_account_key,
                                        account_name=self.storage_account_name)
        download_container(blob_service=blob_service,
                           container_name=output_container,
                           output_dir=output_dir)
        blob_service.delete_container(container_name=output_container)

    def delete_input_container(self, input_id):
        input_container = self.job_name + '-input' + input_id
        blob_service = BlockBlobService(account_key=self.storage_account_key,
                                        account_name=self.storage_account_name)
        blob_service.delete_container(container_name=input_container)

    def create_task(self, input_files, command_id):
        blob_service = BlockBlobService(account_key=self.storage_account_key,
                                        account_name=self.storage_account_name)
        # Need an output container created.
        output_container_name = self.job_name + '-output' + command_id
        blob_service.create_container(container_name=output_container_name)
        sas_token = blob_service.generate_container_shared_access_signature(container_name=output_container_name,
                                                                            permission=BlobPermissions.WRITE,
                                                                            expiry=datetime.datetime.utcnow() + datetime.timedelta(hours=24))
        sas_url = 'https://{}.blob.core.windows.net/{}?{}'.format(self.storage_account_name, output_container_name, sas_token)
        output_files = self.prepare_output_resource_files(sas_url, output_id=command_id)
        batch_client = self._login_to_batch()
        task = batch.models.TaskAddParameter(
            id=self.job_name + command_id,
            command_line="/bin/bash -c \"{}\"".format(self.command[command_id]),
            resource_files=input_files,
            output_files=output_files,
            # Add this in so user doesn't have to tpye the entirety of the path to conda.
            # Completely unclear on why I can't just modify the $PATH in order to make this work.
            environment_settings=[batchmodels.EnvironmentSetting(name='CONDA', value='/usr/bin/miniconda/bin')]
            )
        batch_client.task.add(job_id=self.job_name, task=task)

    def wait_for_tasks_to_complete(self):
        # TODO: Add an optional timeout parameter?
        batch_client = self._login_to_batch()
        # Check the status of all tasks associated with the job.
        all_tasks_completed = False
        while all_tasks_completed is False:
            tasks = batch_client.task.list(self.job_name)
            all_tasks_completed = True
            for task in tasks:
                if task.state != batchmodels.TaskState.completed:
                    all_tasks_completed = False
            time.sleep(30)

    def check_task_exit_codes(self):
        exit_codes = dict()
        batch_client = self._login_to_batch()
        tasks = batch_client.task.list(self.job_name)
        for task in tasks:
            exit_codes[task.id] = task.execution_info.exit_code
        return exit_codes

    def validate_job_name(self):
        # Since we use job name as container name, need to conform to those standards. Job name must therefore be:
        # 1) Lowercase
        # 2) contain only letters, numbers, and hyphens (and never two hyphens in a row)
        # 3) be between 3 and 63 characters.
        if self.job_name.lower() != self.job_name:
            raise AttributeError('Job name must be entirely lower case.')
        if len(self.job_name) < 4 or len(self.job_name) > 63:
            raise AttributeError('Job name must be between 3 and 63 characters. Your job, {name} has {name_length} '
                                 'characters.'.format(name=self.job_name,
                                                      name_length=len(self.job_name)))
        if '--' in self.job_name or self.job_name.endswith('-'):
            raise AttributeError('Hyphens are allowed in the job name, but you can have no more than one hyphen in '
                                 'a row, and the job name cannot end with a hyphen.')
        if self.job_name.replace('-', '').isalnum() is False:
            raise AttributeError('Job names must contain only letters, numbers and hyphens. Special characters are '
                                 'not allowed.')


def sanitize_id(identifier):
    if identifier.isalnum() is False:
        raise AttributeError('Input/output/command names must contain only letters, numbers and hyphens. Special '
                             'characters are not allowed. Your identifier was: {}'.format(identifier))
    return identifier.lower()


def parse_configuration_file(config_file):
    """
    Parse the configuration file a user provides and return an insantiated object that can do all the things - seems
    to most likely be the best way to do things.

    It seems that the best way to do this may, sadly, be to make users write a config file.
    Things we'll need in the config file:
    # Azure-related things - should be able to have these pre-filled for people.
    BATCH_ACCOUNT_NAME=''
    BATCH_ACCOUNT_KEY=''
    BATCH_ACCOUNT_URL=''
    STORAGE_ACCOUNT_NAME = ''
    STORAGE_ACCOUNT_KEY = ''
    # This will be very necessary
    JOB_NAME =
    # Allow multiple input files - each one can be a unix-y mv, with the last arg being a folder to place the files
    in on cloud VM.
    INPUT =
    # Also allow multiple output files - each will get uploaded to blob storage, and optionally download from blob
    storage to user's computer.
    OUTPUT =
    # The command to run on cloud.
    COMMAND =
    # The URL for the VM image user wants to run - will need to have a list somewhere showing what VMs have what
    programs installed
    VM_IMAGE =
    # Have a default VM size that should be sufficient for essentially anything, but allow for custom VMs
    VM_SIZE =
    """
    with open(config_file) as f:
        config_options = f.readlines()

    azurebatch = AzureBatch()
    unrecognized_options = list()

    # Go through the input file and parse through all the things.
    # If user has specified any options that are not part of our set of recognized options, boot them out with a message
    for config_option in config_options:
        config_option = config_option.rstrip()
        x = config_option.split(':=')
        option = x[0]
        parameter = x[1]
        # Unfortunate if structure :(
        if option == 'BATCH_ACCOUNT_NAME':
            azurebatch.batch_account_name = parameter
        elif option == 'BATCH_ACCOUNT_KEY':
            azurebatch.batch_account_key = parameter
        elif option == 'BATCH_ACCOUNT_URL':
            azurebatch.batch_account_url = parameter
        elif option == 'STORAGE_ACCOUNT_NAME':
            azurebatch.storage_account_name = parameter
        elif option == 'STORAGE_ACCOUNT_KEY':
            azurebatch.storage_account_key = parameter
        elif option == 'JOB_NAME':
            azurebatch.job_name = parameter
        elif 'COMMAND' in option:
            if len(option.split('_')) == 2:
                command_id = sanitize_id(option.split('_')[1])
            else:
                command_id = ''
            azurebatch.command[command_id] = parameter
        elif 'INPUT' in option:
            if len(option.split('_')) == 2:
                input_id = sanitize_id(option.split('_')[1])
            else:
                input_id = ''
            if input_id not in azurebatch.input:
                azurebatch.input[input_id] = [parameter]
            else:
                azurebatch.input[input_id].append(parameter)
        elif 'CLOUDIN' in option:
            if len(option.split('_')) == 2:
                input_id = sanitize_id(option.split('_')[1])
            else:
                input_id = ''
            if input_id not in azurebatch.cloud_input:
                azurebatch.cloud_input[input_id] = [parameter]
            else:
                azurebatch.cloud_input[input_id].append(parameter)
        elif 'OUTPUT' in option:
            if len(option.split('_')) == 2:
                output_id = sanitize_id(option.split('_')[1])
            else:
                output_id = ''
            if output_id not in azurebatch.output:
                azurebatch.output[output_id] = [parameter]
            else:
                azurebatch.output[output_id].append(parameter)
        elif option == 'VM_IMAGE':
            azurebatch.vm_image = parameter
        elif option == 'VM_SIZE':
            azurebatch.vm_size = parameter
        elif option == 'VM_CLIENT_ID':
            azurebatch.vm_client_id = parameter
        elif option == 'VM_SECRET':
            azurebatch.vm_secret = parameter
        elif option == 'VM_TENANT':
            azurebatch.vm_tenant = parameter
        else:
            unrecognized_options.append(option)

    # Check that no options were submitted that were not recognized.
    if len(unrecognized_options) > 0:
        raise AttributeError('The following options were specified in configuration file {config_file},'
                             ' but not recognized: {options}'.format(options=unrecognized_options,
                                                                     config_file=config_file))

    return azurebatch


def recursive_file_list(directory):
    file_list = glob.glob(os.path.join(directory, '**'), recursive=True)
    # Need to delete any directories, because we don't want those.
    items_to_remove = list()
    for item in file_list:
        if os.path.isdir(item):
            items_to_remove.append(item)
    for item in items_to_remove:
        file_list.remove(item)
    return file_list


def check_no_attributes_none(azurebatch_object):
    missing_attributes = list()
    attrs = vars(azurebatch_object)
    for attr in attrs:
        if attrs[attr] is None:
            missing_attributes.append(attr.upper())
        elif type(attrs[attr]) is list:
            if len(attrs[attr]) == 0:
                missing_attributes.append(attr.upper())
    if len(missing_attributes) > 0:
        raise AttributeError('The following options are required, but were not found in your '
                             'configuration file: {}'.format(missing_attributes))


def check_input_output_command_match(azurebatch_object):
    # Check input has corresponding outputs and commands.
    for input_id in azurebatch_object.input:
        if input_id not in azurebatch_object.output:
            raise AttributeError('Input ID {} is present, but there is no corresponding output ID.')
        if input_id not in azurebatch_object.command:
            raise AttributeError('Input ID {} is present, but there is no corresponding command ID.')
    # Check output has corresponding inputs and commands.
    for output_id in azurebatch_object.output:
        if output_id not in azurebatch_object.input and output_id not in azurebatch_object.cloud_input:
            raise AttributeError('Output ID {} is present, but there is no corresponding input ID.')
        if output_id not in azurebatch_object.command:
            raise AttributeError('Output ID {} is present, but there is no corresponding command ID.')
    # Check commands have correspoding inputs and outputs
    for command_id in azurebatch_object.command:
        if command_id not in azurebatch_object.input and command_id not in azurebatch_object.cloud_input:
            raise AttributeError('Command ID {} is present, but there is no corresponding input ID.')
        if command_id not in azurebatch_object.output:
            raise AttributeError('Command ID {} is present, but there is no corresponding output ID.')


if __name__ == '__main__':
    logging.basicConfig(format='\033[92m \033[1m %(asctime)s \033[0m %(message)s ',
                        level=logging.INFO,
                        datefmt='%Y-%m-%d %H:%M:%S')
    parser = argparse.ArgumentParser()
    parser.add_argument('-c', '--configuration_file',
                        type=str,
                        required=True,
                        help='Path to your configuration file.')
    parser.add_argument('-d', '--download_output_files',
                        default=True,
                        action='store_false',
                        help='By default, output files will be downloaded from blob storage to local machine '
                             'and the blob files deleted. Activate this to not download files and keep them in '
                             'blob storage.')
    parser.add_argument('-k', '--keep_input_container',
                        default=False,
                        action='store_true',
                        help='By default, input container is deleted. Activate this to not delete the input container')
    parser.add_argument('-o', '--output_dir',
                        type=str,
                        default=os.getcwd(),
                        help='Directory where you want your output files stored. Defaults to your current working '
                             'directory.')
    parser.add_argument('-e', '--exit_code_file',
                        type=str,
                        required=False,
                        help='If specified, will create a file that stores the exit code for each task.')
    args = parser.parse_args()

    logging.info('Reading in configuration file {}...'.format(args.configuration_file))
    azurebatch = parse_configuration_file(args.configuration_file)
    check_no_attributes_none(azurebatch)
    azurebatch.validate_job_name()
    check_input_output_command_match(azurebatch)
    logging.info('Configuration file validated. Creating pool...')
    # Create pool before uploading files - this way, the pool should (hopefully) be created by the time file
    # upload finishes, depending on how large the input files are.
    azurebatch.create_pool(num_nodes=len(azurebatch.command))
    logging.info('Uploading files...')
    # TODO: Add progress bar for each file for more user feedback?
    resource_files = dict()
    try:
        for input_id in azurebatch.input:
            resource_files[input_id] = azurebatch.upload_input_to_blob_storage(input_id=input_id)
    except:  # TODO: Double check the exception that comes up on upload timeout and make more specific.
        logging.error('ERROR: Upload of local files to cloud failed. Please try again.')
        azurebatch.delete_pool()
        if args.exit_code_file:  # Update exit code file with a non-zero code if upload failed.
            with open(args.exit_code_file, 'a+') as f:
                for command_id in azurebatch.command:
                    f.write('{},{}\n'.format(azurebatch.job_name + command_id, '1'))
        quit(code=1)

    # Also use cloud files already in blob storage as input, if specified.
    for input_id in azurebatch.cloud_input:
        cloud_resource = azurebatch.prepare_cloud_input_resource_files(input_id=input_id)
        for resource in cloud_resource:
            if input_id in resource_files:
                resource_files[input_id].append(resource)
            else:
                resource_files[input_id] = [resource]

    logging.info('Running tasks...')
    azurebatch.create_job()
    for command_id in azurebatch.command:
        azurebatch.create_task(input_files=resource_files[command_id], command_id=command_id)
    # With tasks submitted, wait for all to reach a 'completed' state.
    azurebatch.wait_for_tasks_to_complete()
    task_exit_codes = azurebatch.check_task_exit_codes()
    # Clean up resources so that we don't spend exorbitant amounts of money.
    logging.info('Tasks complete! Cleaning up pool...')
    azurebatch.delete_job()
    azurebatch.delete_pool()
    logging.info('Downloading output files...')
    if args.download_output_files:
        for output_id in azurebatch.output:
            azurebatch.download_output_files_and_delete_container(output_dir=args.output_dir, output_id=output_id)
    if args.keep_input_container is False:
        for input_id in azurebatch.input:
            azurebatch.delete_input_container(input_id=input_id)

    # Check task exit codes. Warn user if the tasks did not have a 0 (success) exit code.
    for task_id in task_exit_codes:
        if args.exit_code_file:
            with open(args.exit_code_file, 'a+') as f:
                f.write('{},{}\n'.format(task_id, task_exit_codes[task_id]))
        if task_exit_codes[task_id] != 0:
            logging.error('ERROR: Task {} did not complete successfully (exit code {}). See the stderr and stdout '
                          'files of that task for more information.'.format(task_id, task_exit_codes[task_id]))
    logging.info('Complete!')
