test_ref=$(cut -f2 samples_ref.txt | head -n1); if [ "$test_ref" == "NC_045512.2" ]; then rm create_summary_report_metagenomic.sh; mv create_summary_report_sars.sh create_summary_report.sh; else rm create_summary_report_sars.sh; mv create_summary_report_metagenomic.sh create_summary_report.sh; fi
test_ref=$(cut -f2 samples_ref.txt | head -n1); if [ "$test_ref" == "NC_045512.2" ]; then ANALYSIS_TYPE=AMPLICONS; else ANALYSIS_TYPE=METAGENOMIC; fi
test_ref=$(cut -f2 samples_ref.txt | head -n1); if [ "$test_ref" == "NC_045512.2" ]; then CONFIG_FILE=../../DOC/viralrecon_sars.config; else CONFIG_FILE="../../DOC/viralrecon_metagenomic.config"; fi
test_ref=$(cut -f2 samples_ref.txt | head -n1); if [ "$test_ref" == "NC_045512.2" ]; then PARAMS_FILE=../../DOC/viralrecon_sars_params.yml; else PARAMS_FILE="../../DOC/viralrecon_metagenomic_params.yml"; fi

mkdir -p 00-reads
cat samples_ref.txt | cut -f3 | sort -u | while read in; do echo ${in^^}; done > host_list.tmp
i=1; cat host_list.tmp | while read in
do
    FOLDER_NAME=$(echo $(date '+%Y%m%d')_ANALYSIS0${i}_${ANALYSIS_TYPE}_${in})
    mkdir ${FOLDER_NAME}
    cp create_summary_report.sh ${FOLDER_NAME}/
    cp deduplicate_long_table.sh ${FOLDER_NAME}/
    cp percentajeNs.py ${FOLDER_NAME}/
    grep -i ${in} samples_ref.txt | cut -f1,2 > ${FOLDER_NAME}/samples_ref.txt
    echo "ln -s ../00-reads ." > ${FOLDER_NAME}/lablog
    printf "ln -s ../samples_id.txt .\n\n" >> ${FOLDER_NAME}/lablog
    echo "#module load Nextflow singularity" >> ${FOLDER_NAME}/lablog
    echo "" >> ${FOLDER_NAME}/lablog
    printf 'scratch_dir=$(echo $PWD | sed "s/\/data\/bi\/scratch_tmp/\/scratch/g")\n\n' >> ${FOLDER_NAME}/lablog
    cut -f2 ${FOLDER_NAME}/samples_ref.txt | sort -u | while read ref
    do
        echo "sample,fastq_1,fastq_2" > ${FOLDER_NAME}/samplesheet_${ref}.csv
        grep -i ${ref} ${FOLDER_NAME}/samples_ref.txt | while read samples
        do
            arr=($samples); echo "${arr[0]},00-reads/${arr[0]}_R1.fastq.gz,00-reads/${arr[0]}_R2.fastq.gz" >> ${FOLDER_NAME}/samplesheet_${ref}.csv
        done
        REF_FASTA=$(find /data/bi/references/virus/ -name ${ref}.fasta)
        REF_GFF=$(find /data/bi/references/virus/ -name ${ref}.gff)
        echo "cat <<EOF > ${ref}_viralrecon.sbatch" >> ${FOLDER_NAME}/lablog
        echo "#!/bin/sh" >> ${FOLDER_NAME}/lablog
        echo "#SBATCH --ntasks 1" >> ${FOLDER_NAME}/lablog
        echo "#SBATCH --cpus-per-task 2" >> ${FOLDER_NAME}/lablog
        echo "#SBATCH --mem 4G" >> ${FOLDER_NAME}/lablog
        echo "#SBATCH --time 2:00:00" >> ${FOLDER_NAME}/lablog
        echo "#SBATCH --partition middle_idx" >> ${FOLDER_NAME}/lablog
        echo "#SBATCH --output ${ref}_$(date '+%Y%m%d')_viralrecon.log" >> ${FOLDER_NAME}/lablog
        printf "#SBATCH --chdir \$scratch_dir\n\n" >> ${FOLDER_NAME}/lablog
        printf 'export NXF_OPTS="-Xms500M -Xmx4G"\n\n' >> ${FOLDER_NAME}/lablog
        echo "nextflow run /data/bi/pipelines/nf-core-viralrecon-2.5/workflow/main.nf \\\\" >> ${FOLDER_NAME}/lablog
        echo "          -c ${CONFIG_FILE} \\\\" >> ${FOLDER_NAME}/lablog
        echo "          -params-file ${PARAMS_FILE} \\\\" >> ${FOLDER_NAME}/lablog
        echo "          --input samplesheet_${ref}.csv \\\\" >> ${FOLDER_NAME}/lablog
        echo "          --outdir ${ref}_$(date '+%Y%m%d')_viralrecon_mapping \\\\" >> ${FOLDER_NAME}/lablog
        echo "          --fasta ${REF_FASTA} \\\\" >> ${FOLDER_NAME}/lablog
        echo "          --gff ${REF_GFF} \\\\" >> ${FOLDER_NAME}/lablog
        echo "          -resume" >> ${FOLDER_NAME}/lablog
        printf "EOF\n\n" >> ${FOLDER_NAME}/lablog
        printf "echo 'sbatch ${ref}_viralrecon.sbatch' > _01_run_${ref}_viralrecon.sh\n\n" >> ${FOLDER_NAME}/lablog
    done
  echo "#conda activate python3" >> ${FOLDER_NAME}/lablog

  cp _02_create_run_percentage_Ns.sh ${FOLDER_NAME}/
  printf 'echo "bash create_summary_report.sh" > _04_create_stats_table.sh\n\n' >> ${FOLDER_NAME}/lablog
  cp create_assembly_stats.R ${FOLDER_NAME}/
  echo "#module load R/4.2.1" >> ${FOLDER_NAME}/lablog
  printf 'echo "Rscript create_assembly_stats.R" > _05_create_stats_assembly.sh\n\n' >> ${FOLDER_NAME}/lablog
  printf 'echo "bash deduplicate_long_table.sh" > _06_deduplicate_long_table.sh\n\n' >> ${FOLDER_NAME}/lablog

    i=$((i+1))
done
rm host_list.tmp
rm create_summary_report.sh
rm deduplicate_long_table.sh
rm percentajeNs.py
rm _02_create_run_percentage_Ns.sh
cd 00-reads; cat ../samples_id.txt | xargs -I % echo "ln -s ../../RAW/%_*R1*.fastq.gz %_R1.fastq.gz" | bash; cat ../samples_id.txt | xargs -I % echo "ln -s ../../RAW/%_*R2*.fastq.gz %_R2.fastq.gz" | bash; cd -
CURRENT_DIR=$(pwd | cut -d '/' -f1,2,3,4,5,6,7)
echo "srun --partition short_idx rsync -rlv ${CURRENT_DIR} /scratch/bi" > _01_copy_folder.sh
mv DATE_ANALYSIS0X_MAG $(date '+%Y%m%d')_ANALYSIS0X_MAG
SERVICE_ID=$(pwd | cut -d '/' -f7)
echo "srun --partition short_idx rsync -rlv --exclude \"work\" /scratch/bi/${SERVICE_ID}/* ${CURRENT_DIR}/" > _02_copy_back.sh
