# module load singularity

# Lablog to apply variant filters to combined GVCFs generated by SAREK (HaplotypeCaller)


scratch_dir=$(echo $PWD | sed "s/\/data\/ucct\/bi\/scratch_tmp/\/scratch/g")

mkdir -p logs

echo "srun --partition short_idx --time 2:00:00 --chdir ${scratch_dir} --output logs/SELECTSNPS.log --job-name SELECTSNPS singularity exec -B ${scratch_dir}/../../../ /data/ucct/bi/pipelines/singularity-images/gatk4-spark:4.2.0.0--hdfd78af_1 gatk SelectVariants \
     -V ${scratch_dir}/../01-sarek/variant_calling/haplotypecaller/joint_variant_calling/joint_germline.vcf.gz \
     -select-type SNP \
     -O ${scratch_dir}/snps.vcf.gz &" > _01_separate_snps_indels.sh

echo "srun --partition short_idx --time 2:00:00 --chdir ${scratch_dir} --output logs/SELECTINDELS.log --job-name SELECTINDELS singularity exec -B ${scratch_dir}/../../../ /data/ucct/bi/pipelines/singularity-images/gatk4-spark:4.2.0.0--hdfd78af_1 gatk SelectVariants \
    -V ${scratch_dir}/../01-sarek/variant_calling/haplotypecaller/joint_variant_calling/joint_germline.vcf.gz \
    -select-type INDEL \
    -O ${scratch_dir}/indels.vcf.gz &" >> _01_separate_snps_indels.sh

echo "srun --partition short_idx --time 2:00:00 --chdir ${scratch_dir} --output logs/FILSNP.log --job-name FILSNP singularity exec -B ${scratch_dir}/../../../ /data/ucct/bi/pipelines/singularity-images/gatk4-spark:4.2.0.0--hdfd78af_1 gatk VariantFiltration \
	-V ${scratch_dir}/snps.vcf.gz \
	-filter 'QD < 2.0' --filter-name 'QD2' \
	-filter 'QUAL < 30.0' --filter-name 'QUAL30' \
	-filter 'SOR > 3.0' --filter-name 'SOR3' \
	-filter 'FS > 60.0' --filter-name 'FS60' \
	-filter 'MQ < 40.0' --filter-name 'MQ40' \
	-filter 'MQRankSum < -12.5' --filter-name 'MQRankSum-12.5' \
	-filter 'ReadPosRankSum < -8.0' --filter-name 'ReadPosRankSum-8' \
	-O ${scratch_dir}/snps_filtered.vcf.gz &" > _02_filter.sh

echo "srun --partition short_idx --time 2:00:00 --chdir ${scratch_dir} --output logs/FILINDEL.log --job-name FILINDEL singularity exec -B ${scratch_dir}/../../../ /data/ucct/bi/pipelines/singularity-images/gatk4-spark:4.2.0.0--hdfd78af_1 gatk VariantFiltration \
	-V ${scratch_dir}/indels.vcf.gz \
	-filter 'QD < 2.0' --filter-name 'QD2' \
	-filter 'QUAL < 30.0' --filter-name 'QUAL30' \
	-filter 'FS > 200.0' --filter-name 'FS200' \
	-filter 'ReadPosRankSum < -20.0' --filter-name 'ReadPosRankSum-20' \
	-O ${scratch_dir}/indels_filtered.vcf.gz &" >> _02_filter.sh

echo "srun --partition short_idx --time 2:00:00 --chdir ${scratch_dir} --output logs/MERGEVCF.log --job-name MERGEVCF singularity exec -B ${scratch_dir}/../../../ /data/ucct/bi/pipelines/singularity-images/gatk4-spark:4.2.0.0--hdfd78af_1 gatk MergeVcfs \
	-I ${scratch_dir}/snps_filtered.vcf.gz \
	-I ${scratch_dir}/indels_filtered.vcf.gz \
	-O ${scratch_dir}/variants_fil.vcf.gz &" > _03_merge_vcfs.sh

echo "srun --partition short_idx --time 2:00:00 --chdir ${scratch_dir} --output logs/GZIP.log --job-name GZIP gzip -d variants_fil.vcf.gz &" > _04_gzip.sh
