# module load singularity

# srun parameters
scratch_dir=$(echo $(pwd) | sed "s@/data/ucct/bi/scratch_tmp/@/scratch/@g" )
analysis_year=$(pwd | awk -F'ANALYSIS/' '{print substr($2, 1, 4)}')

mkdir logs
cat ../samples_id.txt | xargs -I % echo "mkdir %; ln -s ../../01-preprocessing/%/%_R1_filtered.fastq.gz %/%_lib1_R1.fastq.gz" > _00_prepareRaw.sh
cat ../samples_id.txt | xargs -I % echo "ln -s ../../01-preprocessing/%/%_R2_filtered.fastq.gz %/%_lib1_R2.fastq.gz" >> _00_prepareRaw.sh
cat ../samples_id.txt | xargs -I % echo "cd %;ls *.fastq.gz | tr '_' '\t' | cut -f 1,2 | sort -u > samples.txt;cd -" > _01_preparesamples.sh

cat ../samples_id.txt | xargs -I @@ echo -e "srun --job-name MTBSEQ.@@  --output logs/MTBSEQ.@@.%j.log --partition middle_idx --mem 48G --chdir ${scratch_dir}/@@ --cpus-per-task 10 singularity exec -B ${scratch_dir}/../../../ /data/ucct/bi/pipelines/singularity-images/mtbseq:1.1.0--hdfd78af_0 MTBseq --step TBfull --threads 10 --samples samples.txt &" > _02_mtbseq.sh

# classification
echo "mkdir classification_all" > _03_gather_results.sh 
echo "FIRST_SAMPLE=\$(head -n1 ../samples_id.txt); head -n 1 \${FIRST_SAMPLE}/Classification/Strain_Classification.tab > classification_all/strain_classification_all.tab; grep \"^'\$analysis_year\" */Classification/Strain_Classification.tab | cut -d \":\" -f 2 | sed \"s/'//g\" >> classification_all/strain_classification_all.tab" >> _03_gather_results.sh
# resistances
echo "mkdir resistances_all" >> _03_gather_results.sh
cat ../samples_id.txt | xargs -I % echo "cp %/Amend/NONE_joint_cf4_cr4_fr75_ph4_samples1_amended.tab resistances_all/%_var_res.tab" >> _03_gather_results.sh
# stats
echo "mkdir stats_all; FIRST_SAMPLE=\$(head -n1 ../samples_id.txt); head -n 1 \${FIRST_SAMPLE}/Statistics/Mapping_and_Variant_Statistics.tab > stats_all/statistics_all.tab; grep \"^'\$analysis_year\" */Statistics/Mapping_and_Variant_Statistics.tab | cut -d \":\" -f 2 | sed \"s/'//g\" >> stats_all/statistics_all.tab" >> _03_gather_results.sh
