#!/usr/bin/env python
# Time-stamp: <2016-04-04 11:22:53 Yingxiang Li>

##----PACKAGE------##
import argparse, time, sys, os, commands
from argparse import RawTextHelpFormatter
from pkg_resources import resource_filename

##----FUNCTION-----##
#--common--
def fun_MakeDir(Path_):
	Path_ = Path_.strip().rstrip("\\")
	if not os.path.exists(Path_):
		os.makedirs(Path_)
	return Path_

def fun_Write(File_, File_D_):
	Out_ = open(File_D_, 'w')
	Out_.writelines(File_)
	Out_.close()

def fun_BashOrder(Log_F_, OrderName_, Order_):
	Bash_D_ = fun_MakeDir(Log_F_) + OrderName_ + '.sh'
	Bash_O_ = 'bash ' + Bash_D_ + ' > ' + Log_F_ + OrderName_ + '.log 2>&1'
	fun_Write(Order_, Bash_D_)
	os.system(Bash_O_)

def fun_Decor(Title_, Width_ = 48, DecorType_ = ' ', ShowTime_ = 1, ShowDate_ = 0):
	if ShowTime_ == 1:
		if ShowDate_ == 0:
			Decor_ = '\t|' + Title_ + DecorType_*(Width_ - 2 - len(Title_) - 11) + ' @ ' + time.strftime("%X", time.localtime()) + '|'
		else:
			Decor_ = '\t|' + Title_ + DecorType_*(Width_ - 2 - len(Title_) - 22) + ' @ ' + time.strftime("%Y-%m-%d %X", time.localtime()) + '|'
	else:
		Decor_ = '\t|' + Title_ + DecorType_*(Width_ - 2 - len(Title_)) + '|'
	print Decor_

def fun_ProceTime(FunctName_, Finis_ = 0, Width_ = 48):
	if Finis_ == 0:
		fun_Decor(FunctName_ + '  running', Width_, ' ')
	else:
		fun_Decor(FunctName_ + ' finished', Width_, ' ')

#--special-
#---bwa: index--
def Fun_BwaIndex(Log_F_, Refer_D_):
	fun_ProceTime('bwa index')
	BwaIndex_O_ = 'bwa index -a bwtsw ' + Refer_D_
	ReferName_ = os.path.basename(os.path.splitext(Refer_D_)[0])
	fun_BashOrder(Log_F_, 'Bwa_Index.' + ReferName_, BwaIndex_O_)
	fun_ProceTime('bwa index', 1)

#---FastQC: quality control--
def Fun_FastQC(Outpu_F_, Log_F_, Fastq1_D_, Fastq2_D_, FastQC_):
	if FastQC_:	
		fun_ProceTime('fastqc')
		FastQC_O_ = 'fastqc -q --extract -o ' + fun_MakeDir(Outpu_F_ + 'FastQC/') + ' ' + Fastq1_D_ + ' ' + Fastq2_D_
		fun_BashOrder(Log_F_, 'FastQC_QualiyControl', FastQC_O_)
		fun_ProceTime('fastqc', 1)

#---bwa: map--
def Fun_BwaMap(Outpu_F_, Log_F_, Fastq1_D_, Fastq2_D_, Name_, Refer_D_):
	fun_ProceTime('bwa map')
	ReferName_ = os.path.basename(os.path.splitext(Refer_D_)[0])
	SinglOrPair_ = ['Pa', 'Si'][Fastq2_D_ == '']
	BwaMap_D_ = fun_MakeDir(Outpu_F_ + 'Bwa/') + Name_ + '.sam'
	BwaMap_O_ = '''bwa mem -t 10 -R "@RG\\tID:''' + Name_ + '.Bwa_Map' + SinglOrPair_ + '\\tLB:bwa\\tPL:NA\\tSM:' + Name_ + '\" ' + Refer_D_ + ' ' + Fastq1_D_ + ' ' + Fastq2_D_ + ' > '+ BwaMap_D_
	fun_BashOrder(Log_F_, 'Bwa_Map', BwaMap_O_)	
	fun_ProceTime('bwa map', 1)
	
#---samtools: sam to bam--
def Fun_SamToolsSamToBam(Outpu_F_, Log_F_, Name_):
	fun_ProceTime('samtools sam to bam')
	BwaMap_D_ = Outpu_F_ + 'Bwa/' + Name_ + '.sam'
	Bam_D_ = fun_MakeDir(Outpu_F_ + 'SamTools/') + Name_ + '.bam'
	SamToolsSamToBam_O_ = 'samtools view -bhS ' + BwaMap_D_ + ' -o ' + Bam_D_
	fun_BashOrder(Log_F_, 'SamTools_SamToBam', SamToolsSamToBam_O_)
	fun_ProceTime('samtools sam to bam', 1)

#---samtools: sort--
def Fun_SamToolsSort(Outpu_F_, Log_F_, Name_):
	fun_ProceTime('samtools sort')
	Bam_D_ = Outpu_F_ + 'SamTools/' + Name_ + '.bam'
	BamSortName_ = Outpu_F_ + 'SamTools/' + Name_ + '.Sort'
	SamToolsSort_O_ = 'samtools sort ' + Bam_D_ + ' ' + BamSortName_
	fun_BashOrder(Log_F_, 'SamTools_Sort', SamToolsSort_O_)
	fun_ProceTime('samtools sort', 1)

#---samtools: index--
def Fun_SamToolsIndex(Outpu_F_, Log_F_, Name_):
	fun_ProceTime('samtools index')
	BamSort_D_ = Outpu_F_ + 'SamTools/' + Name_ + '.Sort.bam'
	SamToolsIndex_O_ = 'samtools index ' + BamSort_D_
	fun_BashOrder(Log_F_, 'SamTools_Index', SamToolsIndex_O_)
	fun_ProceTime('samtools index', 1)

#---samtools: multi pile up--
def Fun_SamToolsMultiPileUp(Outpu_F_, Log_F_, Name_, Refer_D_, Unlim_):
	fun_ProceTime('samtools mpileup')
	BamSort_D_ = Outpu_F_ + 'SamTools/' + Name_ + '.Sort.bam'
	MultiPileUp_D_ = Outpu_F_ + 'SamTools/' + Name_ + '.mpu'
	SamToolsMultiPileUp_O_ = 'SamTools mpileup' + ['', ' -d10000000'][Unlim_] + ' -f ' + Refer_D_ + ' ' + BamSort_D_ + ' > ' + MultiPileUp_D_
	fun_BashOrder(Log_F_, 'SamTools_MultiplePileUp', SamToolsMultiPileUp_O_)
	fun_ProceTime('samtools mpileup', 1)

#---varscan: indel--
def Fun_VarScanIndel(Outpu_F_, Log_F_, Name_, VarScan_, BaseQuali_, VarFreq_, PValue_):
	fun_ProceTime('varscan indel')
	MultiPileUp_D_ = Outpu_F_ + 'SamTools/' + Name_ + '.mpu'
	VarScanIndel_D_ = fun_MakeDir(Outpu_F_ + 'VarScan/') + Name_ + '.indel.tab'
	VarScanIndel_O_ = 'java -jar ' + VarScan_ + ' pileup2indel ' + MultiPileUp_D_ + ' --min-avg-qual ' + BaseQuali_  + ' --min-var-freq ' + VarFreq_+ ' --p-value ' + PValue_ + ' > ' + VarScanIndel_D_
	fun_BashOrder(Log_F_, 'VarScan_Indel', VarScanIndel_O_)
	fun_ProceTime('varscan indel', 1)

#---varscan: read count--
def Fun_VarScanReadCount(Outpu_F_, Log_F_, Name_, VarScan_, BaseQuali_, ReadCount_):
	if ReadCount_:
		fun_ProceTime('varscan read count')
		MultiPileUp_D_ = Outpu_F_ + 'SamTools/' + Name_ + '.mpu'
		VarScanReadCount_D_ = Outpu_F_ + 'VarScan/' + Name_ + '.read.tab'
		VarScanReadCount_O_ = 'java -jar ' + VarScan_ + ' readcounts ' + MultiPileUp_D_ +  ' --min-avg-qual ' + BaseQuali_ + ' > ' + VarScanReadCount_D_
		fun_BashOrder(Log_F_, 'VarScan_ReadCount', VarScanReadCount_O_)
		fun_ProceTime('varscan read count', 1)

#---varscan: consensus call--
def Fun_VarScanConseCall(Outpu_F_, Log_F_, Name_, VarScan_, BaseQuali_, PValue_, Conse_):
	if Conse_:
		fun_ProceTime('varscan consensus call')
		MultiPileUp_D_ = Outpu_F_ + 'SamTools/' + Name_ + '.mpu'
		VarScanConsenCall_D_ = Outpu_F_ + 'VarScan/' + Name_ + '.consensus.tab'
		VarScanConsenCall_O_ = 'java -jar ' + VarScan_ + ' pileup2cns ' + MultiPileUp_D_ +  ' --min-avg-qual ' + BaseQuali_ + ' --p-value ' + PValue_ + ' > ' + VarScanConsenCall_D_
		fun_BashOrder(Log_F_, 'VarScan_ConseCall', VarScanConsenCall_O_)
		fun_ProceTime('varscan consensus call', 1)

#---varscan: snp--
def Fun_VarScanSnp(Outpu_F_, Log_F_, Name_, VarScan_, BaseQuali_, VarFreq_, PValue_, Snp_):
	if Snp_:
		fun_ProceTime('varscan snp')
		MultiPileUp_D_ = Outpu_F_ + 'SamTools/' + Name_ + '.mpu'
		VarScanSnp_D_ = Outpu_F_ + 'VarScan/' + Name_ + '.snp.tab'
		VarScanSnp_O_ = 'java -jar ' + VarScan_ + ' pileup2snp ' + MultiPileUp_D_ +  ' --min-avg-qual ' + BaseQuali_  + ' --min-var-freq ' + VarFreq_+ ' --p-value ' + PValue_ + ' > ' + VarScanSnp_D_
		fun_BashOrder(Log_F_, 'VarScan_Snp', VarScanSnp_O_)
		fun_ProceTime('varscan snp', 1)


##----MAIN---------##
def main():
	Tool = 'CSIA'
	Versi = '1.11'
	Author = 'Yingxiang Li'
	Email = 'xlccalyx@gmail.com'
	Date = 'Apr 02, 2016'
	UpdateDate = '04032016,04042016'
	Home = 'www.calyx.biz'
	FullName = 'CRISPR Sequence Indel Analysis'
	Aim = 'This program is for analysis of indel of CRISPR sequence.'

	Parse_ = argparse.ArgumentParser(description = '\tTool:   ' + Tool + 'v' + Versi + '\n\tDate:   ' + Date + '\n\tAuthor: Yingxiang Li (xlccalyx@gmail.com)\n\tHome:   ' + Home + '\n\tMust-install (NOT guaranteed on other versions):\n\t        bwa: 0.7.5a; fastqc: v0.11.2; samtools: 0.1.19; java: 1.7.0_95', formatter_class = RawTextHelpFormatter)

	Parse_.add_argument('-R', '--reference', help = "reference direction, fasta format. (eg: my_ref.fa)", required = True)
	Parse_.add_argument('-D', '--data', help = "one sample's sequenced fastq folder, ONLY fastq in. one file for single end, two files for paired end. (eg: my_data/)", required = True)
	Parse_.add_argument('-O', '--output', help = "output folder for all result and recording. if not exists, will be created. (eg: my_output/)", required = True)

	Parse_.add_argument('-N', '--name', help = "sample name, default is name of output folder. (eg: my_sample)", default = 'NoName')
	Parse_.add_argument('-P', '--pvalue', help = "minimal p value of indels, default: 0.05.", default = '0.05')
	Parse_.add_argument('-B', '--basequality', help = "minimal base quality, default: 30.", default = '30')
	Parse_.add_argument('-V', '--varfreq', help = "minimal indel frequency, default: 0.0001.", default = '0.0001')
	Parse_.add_argument('-F', '--fastqc', help = "run fastqc to process quality control, default activated. -F will disable.", action = "store_false", default = True)
	Parse_.add_argument('-U', '--unlimited', help = "read depth is unlimited when checking indels, default: disable.", action = "store_true")
	Parse_.add_argument('-E', '--readcount', help = "collect read counts of sequence, default: disable.", action = "store_true")
	Parse_.add_argument('-C', '--consensus', help = "return consensus turn, default: disable.", action = "store_true")
	Parse_.add_argument('-S', '--snp', help = "look for SNP, default: disable.", action = "store_true")

	Args_ = Parse_.parse_args()

	Refer_D_ = Args_.reference
	Data_F_ = Args_.data
	Outpu_F_ = Args_.output

	Name_ = Args_.name
	PValue_ = Args_.pvalue
	BaseQuali_ = Args_.basequality
	VarFreq_ = Args_.varfreq
	FastQC_ = Args_.fastqc
	Unlim_ = Args_.unlimited
	ReadCount_ = Args_.readcount
	Conse_ = Args_.consensus
	Snp_ = Args_.snp

	fun_Decor('', 48, '-', 0, 0)
	fun_Decor('Tool:   ' + Tool + 'v' + Versi, 48, ' ', 0, 0)
	fun_Decor('Author: ' + Author + ' (' + Email + ')', 48, ' ', 0, 0)
	fun_Decor('> BEGIN', 48, ' ', 1, 1)
	if not os.path.isfile(Refer_D_):
		print 'Reference doesn\'t exist!'
	elif len(os.listdir(Data_F_)) == 0:
		print 'Data doesn\' exist!'
	else:
		if Name_ == 'NoName':
			Name_ = [x_ for x_ in Outpu_F_.split('/') if x_ != ''][-1]

		Outpu_F_ = fun_MakeDir(Outpu_F_ + '/') 
		Log_F_ = fun_MakeDir(Outpu_F_ + '/Log/')
		#VarScan_ = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'VarScan/VarScan.v2.3.9.jar')
		VarScan_ = resource_filename('CSIA', 'VarScan.v2.3.9.jar')
		#--bwa: index--
		Fun_BwaIndex(Log_F_, Refer_D_)
		#--fastq files--
		Fastq1_D_ = Data_F_ + sorted(os.listdir(Data_F_))[0]
		Fastq2_D_ = '' if len(os.listdir(Data_F_)) <= 1 else Data_F_ + sorted(os.listdir(Data_F_))[1]
		#--fastqc: quality control--
		Fun_FastQC(Outpu_F_, Log_F_, Fastq1_D_, Fastq2_D_, FastQC_)
		#--bwa: map--
		Fun_BwaMap(Outpu_F_, Log_F_, Fastq1_D_, Fastq2_D_, Name_, Refer_D_)		
		#--samtools: sam to bam--
		Fun_SamToolsSamToBam(Outpu_F_, Log_F_, Name_)
		#--samtools: sort
		Fun_SamToolsSort(Outpu_F_, Log_F_, Name_)
		#--samtools: index
		Fun_SamToolsIndex(Outpu_F_, Log_F_, Name_)
		#--samtools: multi pile up--
		Fun_SamToolsMultiPileUp(Outpu_F_, Log_F_, Refer_D_, Name_, Unlim_)
		#--varscan: indel
		Fun_VarScanIndel(Outpu_F_, Log_F_, Name_, VarScan_, BaseQuali_, VarFreq_, PValue_)
		#--varscan: read count--
		Fun_VarScanReadCount(Outpu_F_, Log_F_, Name_, VarScan_, BaseQuali_, ReadCount_)
		#--varscan: consensus call--
		Fun_VarScanConseCall(Outpu_F_, Log_F_, Name_, VarScan_, BaseQuali_, PValue_, Conse_)
		#--varscan: snp--
		Fun_VarScanSnp(Outpu_F_, Log_F_, Name_, VarScan_, BaseQuali_, VarFreq_, PValue_, Snp_)

##----CODA---------##
		print '''\t|~~~~~~~~~~~~~~~~~www.calyx.biz~~~~~~~~~~~~~~~~|
\t|                     __.                      |
\t|      ___.  ____.   |  |  __. __.__.   __.    |
\t|    _/ ___\ \__  \  |  | <   y  |\  \ /  /    |
\t|    \  c___  /  a \_|  l__\___  | >  x  <     |
\t|     \_____>(______/|____//_____|/__/ \__\\    |
\t|~~~~~~~~~~~~~~~~~www.calyx.biz~~~~~~~~~~~~~~~~|'''
		fun_Decor('> END', 48, ' ', 1, 1)
		fun_Decor('', 48, '-', 0, 0)

##----PROCESS------##
if __name__ == '__main__':
    try:
        main()
    except KeyboardInterrupt:
        sys.stderr.write("User interrupted me! ;-) Bye!\n")
        sys.exit(0)

##----TEST--------##
