# module load singularity

scratch_dir=$(echo $PWD | sed 's/\/data\/ucct\/bi\/scratch_tmp/\/scratch/g')

mkdir logs

# BLOCK 1: CREATION OF INPUT.TAB AND COMMANDS.OUT

## When creating input.tab, run the following line if you want to use .fastq.gz files for your analysis. If not, comment it.
cat ../samples_id.txt | while read in; do echo -e "${in}\t${scratch_dir}/../02-preprocessing/${in}/${in}_R1_filtered.fastq.gz\t${scratch_dir}/../02-preprocessing/${in}/${in}_R2_filtered.fastq.gz"; done >> input.tab

## If you are going to include .fasta files in the analysis, run the following line. Run the previous line as well if you also have .fastq.gz files for your analysis.
## Bear in mind this line considers the .fasta files are inside a certain folder from REFERENCES! Change this line accordingly.
## ls ../../../REFERENCES/*/*.fasta | cut -d '/' -f6 | sed 's/.fasta//g' | while read in; do paste <(echo ${in}) <(echo ${scratch_dir}/../../../REFERENCES/*/${in}.fasta); done >> input.tab

singularity exec -B ${scratch_dir}/../../../ /data/ucct/bi/pipelines/singularity-images/snippy:4.6.0--hdfd78af_4 snippy-multi ${scratch_dir}/input.tab --mincov 9 --mapqual 10 --basequal 5 --minqual 30 --ref ${scratch_dir}/../../../REFERENCES/%% --cpus 5 > commands.out

# BLOCK 2: CREATION OF _00_snippy.sh

head -n -1 commands.out | sed -e "s@^@srun --chdir ${scratch_dir} --output logs/SNIPPY.%j.log --job-name SNIPPY --cpus-per-task 5 --mem 49152 --partition short_idx --time 02:00:00 env - PATH="$PATH" singularity exec -B ${scratch_dir}/../../../ /data/ucct/bi/pipelines/singularity-images/snippy:4.6.0--hdfd78af_4 @" | awk '{print $0" &"}' > _00_snippy.sh

# BLOCK 3: CREATION OF _01_snippy_core.sh

## A) BY DEFAULT: snippy-core will run without masking any positions.
tail -n 1 commands.out | sed -e "s@^@srun --chdir ${scratch_dir} --output logs/SNIPPY_CORE.%j.log --job-name SNIPPY --cpus-per-task 5 --mem 49152 --partition short_idx --time 02:00:00 env - PATH="$PATH" singularity exec -B ${scratch_dir}/../../../ /data/ucct/bi/pipelines/singularity-images/snippy:4.6.0--hdfd78af_4 @" | awk '{print $0" &"}' > _01_snippy_core.sh

## B) If you want to mask complex variants, uncomment these lines:
## echo "grep \"complex\" ./*/snps.vcf | cut -f 1,2,4,5 | cut -d \":\" -f 2 | sort -u | awk '{pos1=\$2; len_ref=length(\$3); printf \"%s\t%s\t%s\n\", \$1, pos1-1, pos1+len_ref+1}' | grep -v \"^#\" > mask_complex_variants.bed" > _01_snippy_core.sh
## ls ${scratch_dir}/../../../REFERENCES | xargs -I %% echo "snippy-core --debug --mask ./mask_complex_variants.bed --mask-char 'N' --ref '../../../REFERENCES/%%' $(cat ../samples_id.txt | xargs)" >> _01_snippy_core.sh

## C) If you want to mask low-coverage variants, uncomment these lines:
## echo "awk -F'\t' '/^NZ_/ {split($9, format, ":"); split($10, values, ":"); for (i in format) if (format[i] == "DP" && values[i] < 10) print $1, $2 - 1, $2}' OFS='\t' ./*/snps.vcf > mask_low_coverage_variants.bed" > _01_snippy_core.sh
## ls ${scratch_dir}/../../../REFERENCES | xargs -I %% echo "snippy-core --debug --mask ./mask_low_coverage_variants.bed --mask-char 'N' --ref '../../../REFERENCES/%%' $(cat ../samples_id.txt | xargs)" >> _01_snippy_core.sh

# BLOCK 4: CREATION OF _02_phylo_aln.sh

echo "srun --chdir ${scratch_dir} --output logs/SNIP-SITES.%j.log --job-name SNIP-SITES --cpus-per-task 5 --mem 49152 --partition short_idx --time 02:00:00 env - PATH="$PATH" singularity exec -B ${scratch_dir}/../../../ /data/ucct/bi/pipelines/singularity-images/snippy:4.6.0--hdfd78af_4 snp-sites -b -c -o phylo.aln core.full.aln &" > _02_phylo_aln.sh

## Run this line if you want to know the size of phylo.aln
## awk 'BEGIN{FS="[> ]"} /^>/{val=$2;next}  {print val,length($0)}' phylo.aln

## Run this line to compare samples in pairs (CHANGE THIS LINE ACCORDING TO YOUR NEEDS, THIS IS JUST AN EXAMPLE!)
#€ awk '$4 != $5 || $4 != $6 || $5 != $6' core.tab > differences.txt

# BLOCK 5: CREATION OF _03_gubbins.sh to run GUBBINS (in order to filter recombinant sites):

echo "env - PATH="$PATH" singularity exec -B ${scratch_dir}/../../../ /data/ucct/bi/pipelines/singularity-images/snippy:4.6.0--hdfd78af_4 snippy-clean_full_aln core.full.aln > clean.full.aln" > _03_gubbins.sh
echo "singularity exec -B ${scratch_dir}/../../../ /data/ucct/bi/pipelines/singularity-images/gubbins:3.3.5--py39pl5321he4a0461_0 run_gubbins.py --threads 20 -p gubbins clean.full.aln" >> _03_gubbins.sh
echo "env - PATH="$PATH" singularity exec -B ${scratch_dir}/../../../ /data/ucct/bi/pipelines/singularity-images/snippy:4.6.0--hdfd78af_4 snp-sites -c gubbins.filtered_polymorphic_sites.fasta > clean.core.aln" >> _03_gubbins.sh

## Run gubbins
echo "srun --chdir ${scratch_dir} --output logs/GUBBINS.%j.log --job-name GUBBINS --cpus-per-task 20 --mem 49152 --partition short_idx --time 02:00:00 bash _03_gubbins.sh &" > _03_run_gubbins.sh
