mkdir $(date '+%Y%m%d')_entrega01
cd $(date '+%Y%m%d')_entrega01

mv ../excel_generator.py ./

#Create directories depending on the analysis
mkdir mapping_consensus 
mkdir variants_annot
mkdir assembly_spades
mkdir abacas_assembly
mkdir ref_samples

#Setting up folder and files required for excel_generator.py
cat ../../ANALYSIS/*/samples_ref.txt | cut -f2 | sort -u > references.tmp
cat references.tmp | while read in; do cat ../../ANALYSIS/*/samples_ref.txt | grep ${in} | cut -f 1 > ref_samples/samples_${in}.tmp; done
cat references.tmp | while read in; do mkdir excel_files_${in}; done
cat references.tmp | while read in; do cd excel_files_${in}; ln -s ../../../ANALYSIS/*/*${in}*/variants/ivar/consensus/bcftools/pangolin pangolin; cd -; done; 
cat references.tmp | while read in; do cd excel_files_${in}; ln -s ../../../ANALYSIS/*/*${in}*/variants/ivar/consensus/bcftools/nextclade nextclade; cd -; done;

#Create symbolic links to files that are going to be converted to excel
cat references.tmp | while read in; do ln -s ../../ANALYSIS/*/*${in}*/variants/ivar/variants_long_table.csv ${in}_variants_long_table.csv; done

for ref in $(cut -f2 ../../ANALYSIS/samples_ref.txt | sort | uniq); do for organism in $(cut -f3 ../../ANALYSIS/samples_ref.txt | tr '[:lower:]' '[:upper:]' | sort | uniq); do report="../../ANALYSIS/*_${organism}/${ref}_*_viralrecon_mapping/multiqc/multiqc_report.html"; if [ -e $report ]; then ln -s ${report} ./multiqc_report_${ref}_${organism}.html; else echo "Multiqc report not found for reference $ref and organism $organism"; fi; done; done
ln -s ../../ANALYSIS/*/mapping_illumina*.tab ./mapping_illumina.csv
ln -s ../../ANALYSIS/*/assembly_stats.csv ./assembly_stats.csv
ln -s ../../ANALYSIS/*/01-PikaVirus-results/all_samples_virus_table_filtered.tsv ./pikavirus_table.tsv

echo "python ./excel_generator.py -r ./references.tmp --merge_lineage_files" > _01_generate_excel_files.sh
#Cleaning temp files and broken symbolic links
echo "find . -xtype l -delete" > _02_clean_folders.sh
echo 'for dir in */; do find ${dir} -xtype l -delete; done' >> _02_clean_folders.sh
echo "find . -type d -empty -delete" >> _02_clean_folders.sh
echo 'cat references.tmp | while read in; do cp excel_files_${in}/*.xlsx ./ ;done' >> _02_clean_folders.sh
echo 'cat references.tmp | while read in; do rm -rf excel_files_${in}; done' >> _02_clean_folders.sh
echo 'cat references.tmp | while read in; do rm ${in}_variants_long_table.xlsx; done' >> _02_clean_folders.sh
echo "rm references.tmp" >> _02_clean_folders.sh
echo "rm -rf ref_samples/" >> _02_clean_folders.sh
echo "rm ./*.csv" >> _02_clean_folders.sh
echo "mkdir excel_files"
echo 'mv *.xlsx excel_files/'

#Create symbolic links to results for every process of the pipeline
cd mapping_consensus; cat ../../../ANALYSIS/*/samples_ref.txt | while read in; do arr=($in); ln -s ../../../ANALYSIS/*/*${arr[1]}*/variants/ivar/consensus/bcftools/${arr[0]}.consensus.fa ./${arr[0]}_${arr[1]}.consensus.fa; done; cd -
cd variants_annot; cat ../../../ANALYSIS/*/samples_ref.txt | while read in; do arr=($in); ln -s ../../../ANALYSIS/*/*${arr[1]}*/variants/ivar/snpeff/${arr[0]}.snpsift.txt ./${arr[0]}_${arr[1]}.snpsift.txt; done; cd -
cd assembly_spades; rsync -rlv ../../../ANALYSIS/*/*/assembly/spades/rnaviral/*.scaffolds.fa.gz .; gunzip *.scaffolds.fa.gz; cd -
cd abacas_assembly; cat ../../../ANALYSIS/*/samples_ref.txt | while read in; do arr=($in); ln -s ../../../ANALYSIS/*/*${arr[1]}*/assembly/spades/rnaviral/abacas/${arr[0]}.abacas.fasta ./${arr[0]}_${arr[1]}.abacas.fasta; done; cd -
ln -s ../../ANALYSIS/*/all_samples_filtered_BLAST_results.xlsx .

#Auxiliar script useful for multiple hosts if samples have pattern to distinguish: e.g. HOSTPATTERN1 = 'MONKEY'

#cat ../viralrecon_results | sed 's@../../@../../../@g' | sed 's/entrega01/HOSTPATTERN1_results/g' > results_wgs
#sed -i 's@ANALYSIS/\*/@ANALYSIS/*HOSTPATTERN1*/@g' results_wgs.sh
#sed 's/HOSTPATTERN1/HOSTPATTERN2/g' results_wgs > results_gf.sh
