# module load singularity

mkdir logs
scratch_dir=$(echo $PWD | sed 's/\/data\/ucct\/bi\/scratch_tmp/\/scratch/g')
downloaded_ref=$(find ../../../../REFERENCES/ -type d -name 'ref_db')

# Cartesian product of the two files to avoid double looping
join -j 2 ../../samples_id.txt ../databases.txt | sed 's/^ //g' > sample_database.txt

# col 1 (arr[0]): sample
# col 2 (arr[1]): database
cat sample_database.txt | grep -v 'pubmlst' | while read in; do arr=($in); echo "mkdir -p ${arr[0]}; srun --chdir $scratch_dir --output logs/ARIBA_${arr[0]}_${arr[1]}.%j.log --job-name ARIBA_${arr[0]}_${arr[1]} --cpus-per-task 5 --mem 5G --partition short_idx --time 02:00:00 singularity exec -B ${scratch_dir}/../../../../ -B /data/ucct/bi/references/ariba/ /data/ucct/bi/pipelines/singularity-images/ariba:2.14.6--py39heaaa4ec_6 ariba run /data/ucct/bi/references/ariba/latest/${arr[1]}/out.${arr[1]}.prepareref ${scratch_dir}/../../01-preprocessing/${arr[0]}/${arr[0]}_R1_filtered.fastq.gz ${scratch_dir}/../../01-preprocessing/${arr[0]}/${arr[0]}_R2_filtered.fastq.gz ${scratch_dir}/${arr[0]}/out_${arr[1]}_${arr[0]}_run &"; done > _01_ariba.sh

cat ../../../samples_id.txt | while read in; do echo "mkdir -p $in; srun --chdir $scratch_dir --output logs/ARIBA_${in}_pubmlst.%j.log --job-name ARIBA_${in}_pubmlst --cpus-per-task 5 --mem 5G --partition short_idx --time 02:00:00 singularity exec -B ${scratch_dir}/../../../../ /data/ucct/bi/pipelines/singularity-images/ariba:2.14.6--py39heaaa4ec_6 ariba run ${scratch_dir}/${downloaded_ref} ${scratch_dir}/../../01-preprocessing/${in}/${in}_R1_filtered.fastq.gz ${scratch_dir}/../../01-preprocessing/${in}/${in}_R2_filtered.fastq.gz ${scratch_dir}/${in}/out_pubmlst_${in}_run &"; done >> _01_ariba.sh

cat sample_database.txt | while read in; do arr=($in); echo "mv ${arr[0]}/out_${arr[1]}_${arr[0]}_run/report.tsv ${arr[0]}/out_${arr[1]}_${arr[0]}_run/${arr[0]}_${arr[1]}_report.tsv"; done > _02_fix_tsvreport.sh
