# module load BLAST+/2.11.0-gompi-2020b

# header:
#  1: stitle
#  2: qaccver
#  3: saccver
#  4: pident
#  5: length
#  6: ismatch
#  7: gapopen
#  8: qstart
#  9: qend
# 10: sstart
# 11: send
# 12: evalue
# 13: bitscore
# 14: slen
# 15: qlen
# 16: qcovs
# 17: %cgAligned
# 18: %refCovered
# MORE INFO: https://www.metagenomics.wiki/tools/blast/blastn-output-format-6

scratch_dir=$(echo $PWD | sed "s/\/data\/bi\/scratch_tmp/\/scratch/g")
mkdir logs
cat ../samples_id.txt | xargs -I %% echo "mkdir %%; cd %%; ln -s ../../*ASSEMBLY/03-assembly/unicycler/%%.fasta .; cd -" | bash
cat ../samples_id.txt | xargs -I %% echo "srun --chdir ${scratch_dir} --partition middle_idx --mem 376530M --time 48:00:00 --cpus-per-task 10 --output logs/BLASTN_%%_%j.log --job-name BLASTN_%% blastn -num_threads 10 -db /data/bi/references/BLAST_dbs/nt_20211025/nt -query %%/%%.fasta -out %%/%%_blast.tab -outfmt '6 qseqid stitle std slen qlen qcovs' &" > _01_blast.sh
cat ../samples_id.txt | xargs -I %% echo "awk 'BEGIN{OFS=\"\t\";FS=\"\t\"}{print \$0,\$5/\$15,\$5/\$14}' %%/%%_blast.tab | awk -v \"samplename=%%\" 'BEGIN{OFS=\"\t\";FS=\"\t\"} \$15 > 200 && \$17 > 0.7 && \$1 !~ /phage/ {print samplename,\$0}' > %%/%%_blast_filt.tab" > _02_filter_blast.sh
echo -e "echo \"samplename\tcontigname\tstitle\tqaccver\tsaccver\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore\tslen\tqlen\tqcovs\t%cgAligned\t%refCovered\" > header" > _03_gather_results.sh
echo "cat header */*blast_filt.tab > all_samples_filtered_BLAST_results.tab" >> _03_gather_results.sh
echo "rm header" >> _03_gather_results.sh
