#! /bin/bash

# Primary command script passed into Batch container runs

# caldp-process  ipppssoot  <input_path>  <output_path>  <caldp_config>

ipppssoot=$1
input_path=${2:-"file:."}
output_path=${3:-"file:."}
caldp_config=${4:-"caldp-config-onsite"}

if [ -z "$DEV_HOME" ]; then
    export DEV_HOME=.
fi

# Configure TIME to point to Gnu's time program which can collect process metrics.
if [ `uname` = "Darwin" -a -f /usr/local/bin/gtime ]; then  # brew install gnu-time
    TIME=/usr/local/bin/gtime
elif [ `uname` = "Linux" ]; then
    TIME=/usr/bin/time
else
    TIME=eval
fi

echo "Starting caldp-process at (input) ${input_path} (output) ${output_path} for ${ipppssoot} using config ${caldp_config}"

source ${caldp_config}

# When running inside Docker, the CRDS cache is *always* at $HOME/crds_cache,
# but may or may not be writeable, and may or may not be dynamic.
if [[ ${CALDP_DOCKER} != "" ]]; then
    export CRDS_PATH=/grp/crds/cache
fi

# This may not work for all CRDS caches unless they are re-organized using the CRDS
# sync tool to be "flat" as well as readonly.  Most existing HST readonly pipeline caches
# should already be flat.  Newly created caches are automatically "instrument" unless
#  reorganized...  which is the offsite / read-write case.
if  [[ ${CRDS_READONLY_CACHE} == "1" ]]; then
    export iref=${CRDS_PATH}/references/hst/
    export jref=${CRDS_PATH}/references/hst/
    export oref=${CRDS_PATH}/references/hst/
    export lref=${CRDS_PATH}/references/hst/
    export nref=${CRDS_PATH}/references/hst/
    export uref=${CRDS_PATH}/references/hst/
    export uref_linux=$uref
else
    export iref=${CRDS_PATH}/references/hst/wfc3/
    export jref=${CRDS_PATH}/references/hst/acs/
    export oref=${CRDS_PATH}/references/hst/stis/
    export lref=${CRDS_PATH}/references/hst/cos/
    export nref=${CRDS_PATH}/references/hst/nicmos/
    export uref=${CRDS_PATH}/references/hst/wfcpc2/
    export uref_linux=$uref
fi

echo ........................................ Environment ..............................................
echo "User is" `whoami`
echo "Current dir is" `pwd`
printenv | sort
ls -ld .

output_location=`echo $output_path | cut -d':' -f1`

if [ $output_path != "none" -a $output_location == 's3' ]; then
    echo ........................................ S3 read check  ..............................................
    bucket=`echo $output_path | cut -d'/' -f3`
    aws s3 ls $bucket
    if [[ $? -ne  0 ]]; then
        echo "XXXXXX  S3 read check failed at ${output_path}."
        exit 1
    fi
fi

echo ........................................ processing log ..............................................
pwd
set -o pipefail && $TIME --verbose -o process_metrics.txt \
    python -m caldp.process $input_path $output_path/data $ipppssoot \
    2>&1 | tee process.txt
process_exit_status=$?
echo "Processing exit status ${process_exit_status}"

processing_s3_output_dir=`caldp-get-output-path  $output_path/data $ipppssoot`
echo "Processing s3_output_dir is" $processing_s3_output_dir

logs_s3_output_dir="${processing_s3_output_dir}/logs"
echo "Logs s3_output_dir is" $logs_s3_output_dir

if [ $output_path == "none" ];  then
    previews_s3_output_dir="file:."
else
    previews_s3_output_dir="${processing_s3_output_dir}/previews"
fi
echo "Previews s3_output_dir is" $previews_s3_output_dir

echo ........................................ previews log ................................................
if [[ "$input_path" =~ "astroquery" ]]; then
    previews_input_dir="file:."
else
    previews_input_dir=$input_path
fi
set -o pipefail && $TIME --verbose -o preview_metrics.txt \
    python -m caldp.create_previews ${previews_input_dir}  ${previews_s3_output_dir} \
    2>&1 | tee preview.txt
preview_exit_status=$?
echo "Preview exit status ${preview_exit_status}"

echo ........................................ process metrics .............................................
cat process_metrics.txt

echo ........................................ preview metrics .............................................
cat preview_metrics.txt

echo ........................................ handling outputs for ${output_location} ............................................
# Note: this action is not logged since log files are being transferred.
if [ $output_path != "none" ]; then
    if [ $output_location == "s3" ]; then
        # Copy out processing log and metrics
        aws s3 cp  --quiet process.txt          ${logs_s3_output_dir}/
        aws s3 cp  --quiet process_metrics.txt  ${logs_s3_output_dir}/

        # Copy out preview log and metrics
        aws s3 cp  --quiet preview.txt          ${logs_s3_output_dir}/
        aws s3 cp  --quiet preview_metrics.txt  ${logs_s3_output_dir}/

        if [[ $process_exit_status -eq 0 && $preview_exit_status -eq 0 ]]; then
            message_type="dataset-processed"
        else
            message_type="dataset-error"
        fi
        echo "Messaging ${message_type}"
        echo "${message_type} ${ipppssoot}" >${ipppssoot}
        aws s3 cp --quiet ${ipppssoot}  ${output_path}/messages/${message_type}/${ipppssoot}
    fi
    if [ $output_location == "file" ]; then
        stripped_output=`echo $output_path | cut -d':' -f2`
        messages_output_dir="${stripped_output}/messages"
        echo "Messages output dir is" $messages_output_dir

        mkdir -p ${logs_s3_output_dir}
        # Copy out processing log and metrics
        cp  process.txt          ${logs_s3_output_dir}/
        cp  process_metrics.txt  ${logs_s3_output_dir}/

        # Copy out preview log and metrics
        cp  preview.txt          ${logs_s3_output_dir}/
        cp  preview_metrics.txt  ${logs_s3_output_dir}/

        if [[ $process_exit_status -eq 0 && $preview_exit_status -eq 0 ]]; then
            message_type="dataset-processed"
        else
            message_type="dataset-error"
        fi
        echo "Messaging ${message_type}"
        echo "${message_type} ${ipppssoot}" >${ipppssoot}
        # outputs file paths to
        find ${stripped_output}/data -mindepth 1 -type f | grep ${ipppssoot:0:6} | while read file; do
            echo "${DEV_HOME}/${file}" >> ${ipppssoot}
        done

        mkdir -p ${messages_output_dir}/${message_type}
        cp ${ipppssoot}  ${messages_output_dir}/${message_type}/${ipppssoot}
        if [[ $message_type == "dataset-processed" ]]; then
            mkdir -p ${messages_output_dir}/dataset-synced
            cp ${ipppssoot}  ${messages_output_dir}/dataset-synced
        fi
    fi
fi

[[ $process_exit_status -eq 0 && $preview_exit_status -eq 0 ]] || exit 1
