# module load Nextflow singularity

ln -s ../00-reads/ .
ln -s ../samples_id.txt .

# create ped file:
while read -r sample;
do
	family_id=${sample%?}
	if [ ${sample: -1}  == 1 ];
	then
		father=$sample
    	echo -e "$family_id\t${sample}_${sample}\t0\t0\t1\t1" >> ../../DOC/family.ped
    fi
	if [ ${sample: -1} == 2 ];
	then
		mother=$sample
    	echo -e "$family_id\t${sample}_${sample}\t0\t0\t2\t1" >> ../../DOC/family.ped
    fi
	if [ ${sample: -1} == 3 ];
	then
    	echo -e "$family_id\t${sample}_${sample}\t${father}_${father}\t${mother}_${mother}\t1\t2" >> ../../DOC/family.ped
    fi

done < ./samples_id.txt

echo -e "patient,sex,status,sample,lane,fastq_1,fastq_2" > samplesheet.csv
cat samples_id.txt | xargs -I % echo -e "%,XY,0,%,1,00-reads/%_R1.fastq.gz,00-reads/%_R2.fastq.gz" >> samplesheet.csv
mkdir -p 02-postprocessing 03-annotation 99-stats

scratch_dir=$(echo $PWD | sed "s/\/data\/ucct\/bi\/scratch_tmp/\/scratch/g")

cat <<EOF > sarek.sbatch
#!/bin/sh
#SBATCH --ntasks 1
#SBATCH --cpus-per-task 2
#SBATCH --mem 4G
#SBATCH --time 2-00:00:0
#SBATCH --partition middle_idx
#SBATCH --output $(date '+%Y%m%d')_sarek01.log
#SBATCH --chdir $scratch_dir

export NXF_OPTS="-Xms500M -Xmx4G"

nextflow run /data/ucct/bi/pipelines/nf-core-sarek/nf-core-sarek_3.4.4/3_4_4/main.nf \\
          -c ../../DOC/hpc_slurm_sarek.config \\
          --input 'samplesheet.csv' \\
          --outdir 01-sarek \\
          --genome GATK.GRCh37 \\
	  	  --wes \\
          --intervals '../../REFERENCES/Illumine_Exome_CEX_TargetedRegions_v1.2_modb37_mod.bed' \\
          --dbsnp '/data/ucct/bi/references/eukaria/homo_sapiens/hg19/1000genomes_b37/variants/20131208/dbsnp_138_mod.b37.vcf.gz' \\
	      --dbsnp_tbi '/data/ucct/bi/references/eukaria/homo_sapiens/hg19/1000genomes_b37/variants/20131208/dbsnp_138_mod.b37.vcf.gz.tbi' \\
	      --dbsnp_vqsr '--resource:dbsnp,known=false,training=true,truth=false,prior=2.0 dbsnp_138_mod.b37.vcf.gz' \\
	      --tools 'haplotypecaller' \\
          --save_reference \\
          --joint_germline \\
		  --igenomes_ignore \\
		  --igenomes_base '/data/ucct/bi/references/igenomes/' \\
	  	  -resume
EOF

echo "sbatch sarek.sbatch" > _01_run_sarek.sh

echo "srun --partition short_obx --chdir $scratch_dir rm -rf work &" > _02_clean.sh
echo "srun --partition short_obx --chdir $scratch_dir rm -rf 01-sarek/gatk4/ &" >> _02_clean.sh
echo "srun --partition short_obx --chdir $scratch_dir rm -rf 01-sarek/preprocessing/markduplicates &" >> _02_clean.sh
