ANALYSIS_TYPE=METAGENOMIC
CONFIG_FILE="../../DOC/viralrecon_metagenomic.config"
PARAMS_FILE="../../DOC/viralrecon_metagenomic_params.yml"

cat samples_ref.txt | cut -f3 | sort -u | while read in; do echo ${in^^}; done > host_list.tmp
i=2; cat host_list.tmp | while read in
do
    FOLDER_NAME=$(echo $(date '+%Y%m%d')_ANALYSIS0${i}_${ANALYSIS_TYPE}_${in})
    mkdir ${FOLDER_NAME}
    cp create_summary_report.sh ${FOLDER_NAME}/
    cp deduplicate_long_table.sh ${FOLDER_NAME}/
    cp percentajeNs.py ${FOLDER_NAME}/
    grep -i ${in} samples_ref.txt | cut -f1,2 > ${FOLDER_NAME}/samples_ref.txt
    echo "ln -s ../00-reads ." > ${FOLDER_NAME}/lablog
    printf "ln -s ../samples_id.txt .\n\n" >> ${FOLDER_NAME}/lablog
    echo "# module load Nextflow/22.10.1 singularity" >> ${FOLDER_NAME}/lablog
    echo "" >> ${FOLDER_NAME}/lablog
    printf 'scratch_dir=$(echo $PWD | sed "s/\/data\/bi\/scratch_tmp/\/scratch/g")\n\n' >> ${FOLDER_NAME}/lablog
    cut -f2 ${FOLDER_NAME}/samples_ref.txt | sort -u | while read ref
    do
        echo "sample,fastq_1,fastq_2" > ${FOLDER_NAME}/samplesheet_${ref}.csv
        grep -i ${ref} ${FOLDER_NAME}/samples_ref.txt | while read samples
        do
            arr=($samples); echo "${arr[0]},00-reads/${arr[0]}_R1.fastq.gz,00-reads/${arr[0]}_R2.fastq.gz" >> ${FOLDER_NAME}/samplesheet_${ref}.csv
        done
        REF_FASTA=$(find /data/bi/references/virus/ -name ${ref}.fasta)
        REF_GFF=$(find /data/bi/references/virus/ -name ${ref}.gff)
        echo "cat <<EOF > ${ref}_viralrecon.sbatch" >> ${FOLDER_NAME}/lablog
        echo "#!/bin/sh" >> ${FOLDER_NAME}/lablog
        echo "#SBATCH --ntasks 1" >> ${FOLDER_NAME}/lablog
        echo "#SBATCH --cpus-per-task 2" >> ${FOLDER_NAME}/lablog
        echo "#SBATCH --mem 4G" >> ${FOLDER_NAME}/lablog
        echo "#SBATCH --time 2:00:00" >> ${FOLDER_NAME}/lablog
        echo "#SBATCH --partition middle_idx" >> ${FOLDER_NAME}/lablog
        echo "#SBATCH --output ${ref}_$(date '+%Y%m%d')_viralrecon.log" >> ${FOLDER_NAME}/lablog
        printf "#SBATCH --chdir \$scratch_dir\n\n" >> ${FOLDER_NAME}/lablog
        printf 'export NXF_OPTS="-Xms500M -Xmx4G"\n\n' >> ${FOLDER_NAME}/lablog
        echo "nextflow run /data/bi/pipelines/nf-core-viralrecon-2.6.0/workflow/main.nf \\\\" >> ${FOLDER_NAME}/lablog
        echo "          -c ${CONFIG_FILE} \\\\" >> ${FOLDER_NAME}/lablog
        echo "          -params-file ${PARAMS_FILE} \\\\" >> ${FOLDER_NAME}/lablog
        echo "          --input samplesheet_${ref}.csv \\\\" >> ${FOLDER_NAME}/lablog
        echo "          --outdir ${ref}_$(date '+%Y%m%d')_viralrecon_mapping \\\\" >> ${FOLDER_NAME}/lablog
        echo "          --fasta ${REF_FASTA} \\\\" >> ${FOLDER_NAME}/lablog
        echo "          --gff ${REF_GFF} \\\\" >> ${FOLDER_NAME}/lablog
        echo "          -resume" >> ${FOLDER_NAME}/lablog
        printf "EOF\n\n" >> ${FOLDER_NAME}/lablog
        printf "echo 'sbatch ${ref}_viralrecon.sbatch' > _01_run_${ref}_viralrecon.sh\n\n" >> ${FOLDER_NAME}/lablog
    done
  echo "# conda activate python3" >> ${FOLDER_NAME}/lablog

  cp _02_create_run_percentage_Ns.sh ${FOLDER_NAME}/
  printf 'echo "bash create_summary_report.sh" > _04_create_stats_table.sh\n\n' >> ${FOLDER_NAME}/lablog
  cp create_assembly_stats.R ${FOLDER_NAME}/
  echo "# module load R/4.2.1" >> ${FOLDER_NAME}/lablog
  printf 'echo "Rscript create_assembly_stats.R" > _05_create_stats_assembly.sh\n\n' >> ${FOLDER_NAME}/lablog
  printf 'echo "bash deduplicate_long_table.sh" > _06_deduplicate_long_table.sh\n\n' >> ${FOLDER_NAME}/lablog

    i=$((i+1))
done
rm host_list.tmp
rm create_summary_report.sh
rm deduplicate_long_table.sh
rm percentajeNs.py
rm _02_create_run_percentage_Ns.sh

# Exclusive of genomeev and viral_discovery service, if you see this anywhere else, somebody has copied without reading (https://youtu.be/AgGtGORPHcM?t=4)
mv ANALYSIS03_MAG $(date '+%Y%m%d')_ANALYSIS03_MAG
mv ANALYSIS04_BLAST $(date '+%Y%m%d')_ANALYSIS04_BLAST
