#!/bin/bash

# A BATCH file that takes a FASTA file, and cretes a contigs database from it.

set -e

C() {
    echo -e "\033[1;30m\033[47m$1\033[0m"
}

INFO() {
    echo
    echo
    C ":: $1 ..."
    echo
}

ABSPATH() {
    [[ $1 = /* ]] && echo "$1" || echo "$PWD/${1#./}"
}


if [ "$#" -ne 1  ]; then
    echo "Error: You are supposed to give a single FASTA file path as a parameter."
    exit -1
fi

EXT=$(basename $1 | cut -f 2 -d '.')
if [ "$EXT" != "fa" ]; then
    echo "Error: File name has to have an extension of '.fa'"
    exit -1
fi

FASTA=`ABSPATH $1`

if [ ! -f $FASTA ]; then
    echo "Error: File not found :("
    exit -1
fi

INPUT_DIR=${FASTA%/*} 
BASE=${FASTA##*/}
FILENAME=${BASE%.*}
MAP=$INPUT_DIR/$FILENAME-MAP.txt
TEMP_FASTA=$INPUT_DIR/$FILENAME-clean.fa
CONTIGS_DB=$INPUT_DIR/$FILENAME.db

INFO "INPUT DIR: $INPUT_DIR, FNAME: $FILENAME"

INFO "RENAMING CONTIGS"
anvi-script-reformat-fasta $FASTA --simplify-names -o $TEMP_FASTA -r $MAP

INFO "GENERATING THE CONTIGS DB"
anvi-gen-contigs-database -f $INPUT_DIR/$FILENAME-clean.fa -o $CONTIGS_DB --skip-mindful-splitting

# remove the temporary file with clean names
rm $TEMP_FASTA

INFO "RUNNING HMMs"
anvi-run-hmms -c $CONTIGS_DB
