#!/bin/sh

# Create folders
mkdir -p fasta_inputs
mkdir -p logs

# Find all .gz files and write them to a file list
# TODO: add if to check >1 fasta files are available in assembly results
find ../../*ANALYSIS*ASSEMBLY/*-assembly/unicycler/*.fasta.gz > fasta_inputs/assembly_file_list.txt
ASSEMBLY_LIST=fasta_inputs/assembly_file_list.txt

# Get the number of files
num_files=$(wc -l < $ASSEMBLY_LIST)

scratch_dir=$(echo $PWD | sed "s/\/data\/ucct\/bi\/scratch_tmp/\/scratch/g")

# STEP 1: Set up jobarray to unzip fasta files
cat <<EOF > _00_unzip_jobarray.sbatch
#!/bin/bash
#SBATCH --job-name=unzip_fasta
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=2
#SBATCH --mem=8G
#SBATCH --time=2:00:00
#SBATCH --partition short_idx
#SBATCH --array=1-$num_files
#SBATCH --chdir $scratch_dir
#SBATCH --output logs/slurm-%A_%a.out
#SBATCH --error logs/slurm-%A_%a.err

# Get the file to process
file=\$(sed -n "\${SLURM_ARRAY_TASK_ID}p" $ASSEMBLY_LIST)

# Unzip the file to the destination directory
gzip -dkc \$file > fasta_inputs/\$(basename "\$file" .gz)

EOF

# STEP 2: Setup exe file to perform unzip and emmtyper.
cat <<EOF > _01_emmtyper.sbatch
#!/bin/bash
#SBATCH --job-name emmtyper
#SBATCH --ntasks 1
#SBATCH --cpus-per-task 4
#SBATCH --mem 24G
#SBATCH --time 4:00:00
#SBATCH --partition short_idx
#SBATCH --chdir $scratch_dir
#SBATCH --output ./$(date '+%Y%m%d')_emmtyper.log

# module load singularity

# create results folder
mkdir -p 01-typing
mkdir -p 01-typing/tmps
blastdb_path=/data/ucct/bi/references/cdc_emm_blastdb/20240509

# Run emmtyper
singularity exec \\
    --bind ${scratch_dir} \\
    --bind ${scratch_dir}/../../ \\
    --bind  \$blastdb_path \\
    /data/ucct/bi/pipelines/singularity-images/singularity-emmtyper.0.2.0--py_0 emmtyper \\
    -w blast \\
    --keep \\
    --blast_db "\${blastdb_path}/cdc_emm_database" \\
    --percent-identity 100 \\
    --culling-limit 5 \
    --output 01-typing/results_emmtyper.out \\
    --output-format verbose \\
    ./fasta_inputs/*.fasta

mv *.tmp 01-typing/tmps

EOF

# Bash script that performs all steps above
echo "#!/bin/bash" > _ALLSTEPS_emmtyper.sh
echo "# # module load singularity" >> _ALLSTEPS_emmtyper.sh
echo "unzip_job_id=\$(sbatch _00_unzip_jobarray.sbatch | awk '{print \$4}')" >> _ALLSTEPS_emmtyper.sh
echo "sbatch --dependency=afterok:\${unzip_job_id} _01_emmtyper.sbatch" >> _ALLSTEPS_emmtyper.sh

chmod +x _ALLSTEPS_emmtyper.sh
