# module load Nextflow singularity

ln -s ../00-reads/ .
ln -s ../samples_id.txt .

echo -e "patient,sex,status,sample,lane,fastq_1,fastq_2" > samplesheet.csv
cat samples_id.txt | xargs -I % echo -e "%,XY,0,%,1,00-reads/%_R1.fastq.gz,00-reads/%_R2.fastq.gz" >> samplesheet.csv

while read -r sample;
do
	family_id=${sample%?}
    echo -e "$family_id\t${sample}_${sample}\t0_0\t0_0\t1\t2" >> ../../DOC/family.ped
done < ./samples_id.txt

mkdir -p 02-postprocessing 03-annotation 99-stats

scratch_dir=$(echo $PWD | sed "s/\/data\/ucct\/bi\/scratch_tmp/\/scratch/g")

cat <<EOF > sarek.sbatch
#!/bin/sh
#SBATCH --ntasks 1
#SBATCH --cpus-per-task 2
#SBATCH --mem 4G
#SBATCH --time 2-00:00:0
#SBATCH --partition middle_idx
#SBATCH --output $(date '+%Y%m%d')_sarek01.log
#SBATCH --chdir $scratch_dir

export NXF_OPTS="-Xms500M -Xmx4G"

nextflow run /data/ucct/bi/pipelines/nf-core-sarek/nf-core-sarek_3.4.4/3_4_4/main.nf \\
	-c ../../DOC/hpc_slurm_sarek.config \\
	--input 'samplesheet.csv' \\
	--outdir 01-sarek \\
	--genome GATK.GRCh37 \\
	--wes \\
	--intervals '../../REFERENCES/Illumine_Exome_CEX_TargetedRegions_v1.2_modb37_mod.bed' \\
	--dbsnp '/data/ucct/bi/references/eukaria/homo_sapiens/hg19/1000genomes_b37/variants/20131208/dbsnp_138_mod.b37.vcf.gz' \\
	--dbsnp_tbi '/data/ucct/bi/references/eukaria/homo_sapiens/hg19/1000genomes_b37/variants/20131208/dbsnp_138_mod.b37.vcf.gz.tbi' \\
	--igenomes_ignore \\
	--igenomes_base '/data/ucct/bi/references/igenomes/' \\
	--tools 'haplotypecaller' \\
	--joint_germline \\
	-resume
EOF

echo "sbatch sarek.sbatch" > _01_run_sarek.sh

echo "srun --partition short_obx --chdir $scratch_dir rm -rf work &" > _02_clean.sh
echo "srun --partition short_obx --chdir $scratch_dir rm -rf 01-sarek/gatk4/ &" >> _02_clean.sh
