#!/usr/bin/env python
# CharGer - Characterization of Germline variants
# author: Adam D Scott (adamscott@wustl.edu)
# author: Kuan-lin Huang (kuan-lin.huang@wustl.edu)
# author: Amila Weerasinghe (amila@wustl.edu)
# author: R Jay Mashl (rmashl@wustl.edu)
# version: v0.5.0 - 2018*01

import sys
import getopt
from charger import charger
import time
import argparse

def parseArgs( argv ):
	helpText = "\nCharGer - v0.5.0\n\n"
	helpText += "Usage: "
	helpText += "charger <input file> [options]\n\n"
	helpText += "Accepted input data files:\n"
	helpText += "  -m Standard .maf\n"
	helpText += "  -f Standard .vcf\n"
	helpText += "  -T Custom .tsv\n"
	helpText += "Output:\n"
	helpText += "  -o output file\n"
	helpText += "  -w output as HTML (flag)\n"
	helpText += "  -k annotate input (flag)\n"
	helpText += "  --run-url-test test url when creating links\n"
	helpText += "  --include-vcf-details (flag)\n"
	helpText += "Access data:\n"
	helpText += "  -l ClinVar (flag)\n"
	helpText += "  -x ExAC (flag)\n"
	helpText += "  -E VEP (flag)\n"
	helpText += "  -t TCGA cancer types (flag)\n"
	helpText += "Suppress data or overrides:\n"
	helpText += "  -O override with ClinVar description (flag)\n"
	helpText += "  -D suppress needing disease specific (flag)\n"
	helpText += "Cross-reference data files:\n"
	helpText += "  -z pathogenic variants, .vcf\n"
	helpText += "  -e expression matrix file, .tsv\n"
	helpText += "  --inheritanceGeneList inheritance gene list file, (format: gene\\tdisease\\tmode_of_inheritance) .txt\n"
	helpText += "  --PP2GeneList PP2 gene list file, (format: column of genes) .txt\n"
	helpText += "  --BP1GeneList BP1 gene list file, (format: column of genes) .txt\n"
	helpText += "  -d diseases file, (format: gene\\tdisease\\tmode_of_inheritance) .tsv\n"
	helpText += "  -n de novo file, standard .maf\n"
	helpText += "  -a assumed de novo file, standard .maf\n"
	helpText += "  -c co-segregation file, standard .maf\n"
	helpText += "  -H HotSpot3D clusters file, .clusters\n"
	helpText += "Thresholds:\n"
	helpText += "  --recurrence-threshold HotSpot3D recurrence threshold (default = 2)\n"
	helpText += "  --rare-threshold Allele frequency threshold for rare (default = 0.0005 (0.05%)):\n"
	helpText += "  --common-threshold Allele frequency threshold for common (default = 0.005 (0.5%)):\n"
	helpText += "Pathogenicity/benignity standard module scores:\n"
	helpText += "  --PVS1 very strong pathogenicity (default = 8)\n"
	helpText += "  --PS1 , --PS2 , --PS3 , --PS4 strong pathogenicity (defaults: PS1 = 7, PS2=PS3=PS4 = 4)\n"
	helpText += "  --PM1 , --PM2 , --PM3 , --PM4 , --PM5 , --PM6 moderate pathogenicity (defaults: PM1=PM2=PM3=PM4=PM5 = 2)\n"
	helpText += "  --PP1 , --PP2 , --PP3 , --PP4 , --PP5 supporting pathogenicity (defaults: PP1=PP2=PP3=PP4=PP5 = 1)\n"
	helpText += "  --BP1 , --BP2 , --BP3 , --BP4 , --BP5 , --BP6 , --BP7 supporting benignity (defaults: BP1=BP2=BP3=BP4=BP5=BP6=BP7 = -1)\n"
	helpText += "  --BS1 , --BS2 , --BS3 , --BS4 strong benignity (defaults: BS1=BS2=BS3=BS4 = -4)\n"
	helpText += "  --BA1 stand-alone benignity (defaults: BA1 = -8)\n"
	helpText += "Pathogenicity/benignity CharGer-defined module scores:\n"
	helpText += "  --PSC1 strong pathogenicity (defaults: PSC1 = 4)\n"
	helpText += "  --PMC1 moderate pathogenicity (defaults: PMC1 = 2)\n"
	helpText += "  --PPC1 , --PPC2 supporting pathogenicity (defaults: PPC1=PPC2 = 1)\n"
	helpText += "  --BMC1 moderate benignity (defaults: BMC1 = -2)\n"
	helpText += "  --BSC1 strong benignity (defaults: BSC1 = -6)\n"
	helpText += "Pathogenicity/benignity CharGer category thresholds:\n"
	helpText += "  --min-pathogenic-score threshold for classifying variant as pathogenic (default = 9)\n"
	helpText += "  --min-likely-pathogenic-score threshold for classifying variant as likely pathogenic (default = 5)\n"
	helpText += "  --max-likely-benign-score threshold for classifying variant as likely benign (default = -4)\n"
	helpText += "  --max-benign-score threshold for classifying variant as benign (default = -8)\n"
	helpText += "Local VEP (works with .vcf input only; suppresses ReST too):\n"
	helpText += "  --perl Path to Perl\n"
	helpText += "  --vep-script Path to VEP\n"
	helpText += "  --vep-config config-file for VEP\n"
	#helpText += "  --vep-dir Path to VEP directory\n"
	helpText += "  --vep-cache Path to VEP cache directory\n"
	helpText += "  --vep-version VEP version (default = 87)\n"
	helpText += "  --vep-output VEP output file (default = charger.vep.vcf)\n"
	helpText += "  --grch assembly GRCh verion (default = 37)\n"
	helpText += "  --ensembl-release Ensembl release version (default = 75)\n"
	helpText += "  --reference-fasta VEP reference fasta\n"
	helpText += "  --fork Number of forked processes used in VEP (default = 0) \n"
	helpText += "Local databases (suppresses ReST too):\n"
	helpText += "  --exac-vcf ExAC vcf.gz\n"
	#helpText += "  --clinvar-tsv ClinVar (.tsv.gz download)\n"
	#helpText += "  --clinvar-vcf ClinVar (.vcf.gz download)\n"
	helpText += "  --mac-clinvar-tsv ClinVar from MacArthur lab (clinvar_alleles.tsv.gz)\n"
	#helpText += "  --mac-clinvar-vcf ClinVar from MacArthur lab (clinvar_alleles.vcf.gz)\n"
	helpText += "Filters:\n"
	helpText += "  --frequency-filter Keep if allele frequency lower (default = 1, process variant with any frequency):\n"
	helpText += "  --vcf-any-filter Keep variants that do not pass all filters in .vcf input (flag)\n"
	helpText += "  --mutation-types Keep types, as a comma delimited list (no spaces)\n"
	helpText += "ReST batch sizes:\n"
	helpText += "  -v VEP (#variants, default/max allowed = 300)\n"
	helpText += "  -b ClinVar summary (#variants, default/max allowed = 500)\n"
	helpText += "  -B ClinVar searchsize (#variants, default/max allowed = 50)\n"
	helpText += "Custom columns (0-based)\n"
	helpText += "  -G HUGO gene symbol\n"
	helpText += "  -X chromosome\n"
	helpText += "  -S start position\n"
	helpText += "  -P stop position\n"
	helpText += "  -R reference allele\n"
	helpText += "  -A alternate allele\n"
	helpText += "  -s strand\n"
	helpText += "  -M sample name\n"
	helpText += "  -C codon\n"
	helpText += "  -p peptide change\n"
	helpText += "  -L variant classification\n"
	helpText += "\n"
	helpText += "  -h this message\n"
	helpText += "\n"

	mafFile = ""
	vcfFile = ""
	tsvFile = ""
	expressionFile = ""
	inheritanceGeneListFile = ""
	PP2GeneListFile = ""
	BP1GeneListFile = ""
	deNovoFile = ""
	assumedDeNovoFile = ""
	coSegregationFile = ""
	diseasesFile = ""
	output = "charger_summary.tsv"
	clinvarSummaryBatchSize = 100
	clinvarSearchBatchSize = 100
	vepBatchSize = 300
	chrColumn = 0
	startColumn = 1
	stopColumn = 2
	refColumn = 3
	altColumn = 4
	geneColumn = None #6
	strandColumn = None #11
	codonColumn = None #14
	peptideChangeColumn = None #15
	variantClassificationColumn = None #15
	sampleColumn = None #21
	alleleFrequencyColumn = None #33
	specific = True
	tcga = False
	clinvar = False
	exac = False
	vep = False
	asHTML = False
	override = False
	recurrenceThreshold = 2
	clustersFile = None
	pathogenicVariantsFile = None
	annotateInput = ""
	perl = None
	vepScript = None
	vepConfig = None
	vepCache = None
	vepOutput = None
	ensemblRelease = str( 75 )
	vepVersion = str( 87 )
	grch = str( 37 )
	fork = str( 1 )
	referenceFasta = None
	exacVCF = None
	macClinVarVCF = None
	macClinVarTSV = None
	doURLTest = True
	rareAF = 0.0005 #from germline studies
	commonAF = 0.05 #from ACMG suggestion
	keepAF = 1
	anyFilter = False
	mutationTypes = []
	includeVCFDetails = False
	pVeryStrong = [ 'PVS1' ]
	pStrong = [ 'PS1' , 'PS2' , 'PS3' , 'PS4' ]
	pModerate = [ 'PM1' , 'PM2' , 'PM3' , 'PM4' , 'PM5' , 'PM6' ]
	pSupport = [ 'PP1' , 'PP2' , 'PP3' , 'PP4' , 'PP5' ]
	bStandAlone = [ 'BA1' ]
	bStrong = [ 'BS1' , 'BS2' , 'BS3' , 'BS4' ]
	bModerate = [] 
	bSupport = [ 'BP1' , 'BP2' , 'BP3' , 'BP4' , 'BP5' , 'BP6' , 'BP7' ]
	cpStrong = [ 'PSC1' ]
	cpModerate = [ 'PMC1' ]
	cpSupport = [ 'PPC1' , 'PPC2' ]
	cbStrong = [ 'BSC1' ]
	cbModerate = [ 'BMC1' ]
	pModuleList = pVeryStrong + pStrong + pModerate + pSupport
	bModuleList = bStandAlone + bStrong + bModerate + bSupport
	moduleList = pModuleList + bModuleList
	scoresMap = { \
		'minPathogenicScore'       : 9 , \
		'minLikelyPathogenicScore' : 5 , \
		'maxLikelyBenignScore'     : -4 , \
		'maxBenignScore'           : -8 , \
		}
	userScoreSwitchesMap = { \
		"--min-pathogenic-score"        : 'minPathogenicScore', \
		"--min-likely-pathogenic-score" : 'minLikelyPathogenicScore' , \
		"--max-benign-score"            : 'maxBenignScore' , \
		"--max-likely-benign-score"     : 'maxLikelyBenignScore' , \
		}
	# Update maps to include modules
	for i in moduleList:
		if ( i in  pVeryStrong ):
			scoresMap[ i ] = 8
		elif ( i in pStrong ):
			if ( i == "PS1" ):
				scoresMap[ i ] = 7
			else:
				scoresMap[ i ] = 4
		elif ( i in pModerate ):
			scoresMap[ i ] = 2
		elif ( i in pSupport ):
			scoresMap[ i ] = 1
		elif ( i in bSupport ):
			scoresMap[ i ] = -1
		elif ( i in bModerate ):
			scoresMap[ i ] = -2
		elif ( i in bStrong ):
			if ( i == "BSC1" ):
				scoresMap[ i ] = -6
			else:
				scoresMap[ i ] = -4
		elif ( i in bStandAlone ):
			scoresMap[ i ] = -8
		userScoreSwitchesMap[ "--" + i ] = i
	# Transform switches to getopt
	userScoreStrings = []
	for k in userScoreSwitchesMap.keys():
		userScoreStrings.insert( len(userScoreStrings) , k[2:] + "=" )
	bHasUserScores = False

	try:
		#haven't used ijquy
		charCommands = "DEtlxhwOkX:s:A:R:S:P:M:G:m:f:T:o:v:b:B:p:C:g:d:e:n:a:c:H:z:L:" 
		opts, args = getopt.getopt( argv , charCommands , \
		["maf=" , "vcf=" , "tsv=" , "output=" , "use-tcga" , \
		"run-vep" , "run-clinvar" , "run-exac" , \
		"vepBatchSize=" , "summaryBatchSize=" , "searchBatchSize=" , \
		"peptideChange=" , "codon=" , "alleleFrequency=" , \
		"inheritanceGeneList=" , "diseases=" , "expression=" , \
		"PP2GeneList=" , "BP1GeneList=" , \
		"deNovo=" , "assumedDeNovo=" , "coSegregation=" , \
		"rare-threshold=" , "common-threshold=" , \
		"recurrence-threshold=" , "frequency-filter=" , \
		"vcf-any-filter" , "mutation-types=" , \
		"hotspot3d=" , "pathogenicVariants=" , \
		"vep-script=" , "vep-config=", "vep-dir=" , "vep-cache=" , "vep-output=" , \
		"ensembl-release=" , "vep-version=" , "perl=" , \
		"grch=" , "reference-fasta=" , "fork=" , \
		"exac-vcf=" , "mac-clinvar-vcf=" , "mac-clinvar-tsv=" , \
		 "run-url-test" , "include-vcf-details" , \
		] + userScoreStrings )
	except getopt.GetoptError:
		print "CharGer ERROR: Command not recognized"
		print( helpText ) 
		sys.exit(2)
	if not opts:
		print "CharGer ERROR: Expected flagged input"
		print( helpText ) 
		sys.exit(2)
	for opt, arg in opts:
		if opt in ( "-h" , "--help" ):
			print( helpText )
			sys.exit()
		elif opt in ( "-m" , "--maf" ):
			mafFile = arg
			codonColumn = 48
			peptideChangeColumn = 49
		elif opt in ( "-f" , "--vcf" ):
			vcfFile = arg
		elif opt in ( "-T" , "--tsv" ):
			tsvFile = arg
		elif opt in ( "-z" , "--pathogenicVariants" ):
			pathogenicVariantsFile = arg
		elif opt in ( "-o" , "--output" ):
			output = arg
		elif opt in ( "-v" , "--vepBatchSize" ):
			vepBatchSize = int( arg )
		elif opt in ( "-b" , "--summaryBatchSize" ):
			clinvarSummaryBatchSize = int( arg )
		elif opt in ( "-B" , "--searchBatchSize" ):
			clinvarSearchBatchSize = int( arg )
		# all customized .tsv options are in caps for now
		elif opt in ( "-X" , "--chromosome" ):
			chrColumn = arg
		elif opt in ( "-s" , "--strand" ):
			strandColumn = arg
		elif opt in ( "-A" , "--alt" ):
			altColumn = arg
		elif opt in ( "-R" , "--ref" ):
			refColumn = arg
		elif opt in ( "-S" , "--start" ):
			startColumn = arg
		elif opt in ( "-P" , "--stop" ):
			stopColumn = arg
		elif opt in ( "-G" , "--gene" ):
			geneColumn = arg
		elif opt in ( "-M" , "--sample" ):
			sampleColumn = arg
		elif opt in ( "-C" , "--codon" ):
			codonColumn = arg
		elif opt in ( "-p" , "--peptideChange" ):
			peptideChangeColumn = arg
		elif opt in ( "-L" , "--variantClassification" ):
			variantClassificationColumn = arg
		elif opt in ( "-F" , "--alleleFrequency" ):
			alleleFrequencyColumn = arg
		elif opt in ( "-g" , "--inheritanceGeneList" ):
			inheritanceGeneListFile = arg
		elif opt in ( "--PP2GeneList" ):
			PP2GeneListFile = arg
		elif opt in ( "--BP1GeneList" ):
			BP1GeneListFile = arg
		elif opt in ( "-e" , "--expression" ):
			expressionFile = arg
		elif opt in ( "-n" , "--deNovo" ):
			deNovoFile = arg
		elif opt in ( "-a" , "--assumedDeNovo" ):
			assumedDeNovoFile = arg
		elif opt in ( "-c" , "--coSegregation" ):
			coSegregationFile = arg
		elif opt in ( "-d" , "--diseases" ):
			diseasesFile = arg
		elif opt in ( "-D" , "--diseaseSpecific" ):
			specific = False
		elif opt in ( "-t" , "--use-tcga" ):
			tcga = True
		elif opt in ( "-l" , "--run-clinvar" ):
			clinvar = True
		elif opt in ( "-E" , "--run-VEP" ):
			vep = True
		elif opt in ( "-x" , "--run-exac" ):
			exac = True
		elif opt in ( "-w" , "--html" ):
			asHTML = True
		elif opt in ( "-O" , "--override" ):
			override = True
		elif opt in ( "-r" , "--recurrence-threshold" ):
			recurrenceThreshold = float( arg )
		elif opt in ( "--rare-threshold" ):
			rareAF = float( arg )
		elif opt in ( "--common-threshold" ):
			commonAF = float( arg )
		elif opt in ( "--frequency-filter" ):
			keepAF = float( arg )
		elif opt in ( "--mutation-types" ):
			mutationTypes = arg.split( "," )
		elif opt in ( "--vcf-any-filter" ):
			anyFilter = True
		elif opt in ( "-H" , "--hotspot3d" ):
			clustersFile = arg
		elif opt in ( "-k" , "--annotate" ):
			annotateInput = True
		elif opt in userScoreSwitchesMap.keys():
			scoresMap[ userScoreSwitchesMap[ opt ] ] = int( arg )
		elif opt in ( "--perl" ):
			perl = arg
		elif opt in ( "--vep-script" ):
			vepScript = arg
		elif opt in ( "--vep-config" ):
			vepConfig = arg
		#elif opt in ( "--vep-dir" ):
		#	vepDir = arg
		elif opt in ( "--vep-cache" ):
			vepCache = arg
		elif opt in ( "--vep-output" ):
			vepOutput = arg
		elif opt in ( "--ensembl-release" ):
			ensemblRelease = arg
		elif opt in ( "--vep-version" ):
			vepVersion = arg
		elif opt in ( "--grch" ):
			grch = arg
		elif opt in ( "--fork" ):
			fork = arg
		elif opt in ( "--reference-fasta" ):
			referenceFasta = arg
		elif opt in ( "--exac-vcf" ):
			exacVCF = arg
		elif opt in ( "--mac-clinvar-vcf" ):
			macClinVarVCF = arg
		elif opt in ( "--mac-clinvar-tsv" ):
			macClinVarTSV = arg
		elif opt in ( "--run-url-test" ):
			doURLTest = False
		elif opt in ( "--include-vcf-details" ):
			includeVCFDetails = True
		if opt in userScoreSwitchesMap.keys():
			bHasUserScores = True
	if not mafFile and not vcfFile and not tsvFile:
		print( "\nCharGer ERROR: must provide an input file\n" )
		print( helpText )
		sys.exit( )
	if annotateInput and asHTML:
		print( "\nCharGer ERROR: cannot output as .html and annotate input file at the same time\n" )
		print( helpText )
		sys.exit( )
	return { "maf" : mafFile , \
	"vcf" : vcfFile , \
	"tsv" : tsvFile , \
	"pathogenicVariantsFile" : pathogenicVariantsFile , \
	"output" : output , \
	"specific" : specific , \
	"tcga" : tcga , \
	"clinvar" : clinvar , \
	"vep" : vep , \
	"exac" : exac , \
	"html" : asHTML , \
	"override" : override , \
	"vepBatchSize" : vepBatchSize , \
	"clinvarSummaryBatchSize" : clinvarSummaryBatchSize , \
	"clinvarSearchBatchSize" : clinvarSearchBatchSize , \
	"expression" : expressionFile , \
	"deNovo" : deNovoFile , \
	"assumedDeNovo" : assumedDeNovoFile , \
	"coSegregation" : coSegregationFile , \
	"diseases" : diseasesFile , \
	"inheritanceGeneList" : inheritanceGeneListFile , \
	"PP2GeneList" : PP2GeneListFile , \
	"BP1GeneList" : BP1GeneListFile , \
	"chrColumn" : chrColumn, \
	"strandColumn" : strandColumn, \
	"startColumn" : startColumn, \
	"stopColumn" : stopColumn, \
	"refColumn" : refColumn, \
	"altColumn" : altColumn, \
	"geneColumn" : geneColumn, \
	"sampleColumn" : sampleColumn, \
	"codonColumn" : codonColumn , \
	"peptideChangeColumn" : peptideChangeColumn , \
	"variantClassificationColumn" : variantClassificationColumn, \
	"alleleFrequencyColumn" : alleleFrequencyColumn, \
	"recurrenceThreshold" : recurrenceThreshold , \
	"rareAF" : rareAF , \
	"commonAF" : commonAF , \
	"keepAF" : keepAF , \
	"anyFilter" : anyFilter , \
	"mutationTypes" : mutationTypes , \
	"clustersFile" : clustersFile , \
	"annotate" : annotateInput , \
	"scoresMap" : scoresMap , \
	"bHasUserScores" : bHasUserScores , \
	"perl" : perl , \
	"vepScript" : vepScript , \
	"vepConfig" : vepConfig , \
	#"vepDir" : vepDir , \
	"vepCache" : vepCache , \
	"vepOutput" : vepOutput , \
	"ensemblRelease" : ensemblRelease , \
	"vepVersion" : vepVersion , \
	"grch" : grch , \
	"fork" : fork , \
	"referenceFasta" : referenceFasta , \
	"exacVCF" : exacVCF , \
	"macClinVarTSV" : macClinVarTSV , \
	"macClinVarVCF" : macClinVarVCF , \
	"doURLTest" : doURLTest , \
	"includeVCFDetails" : includeVCFDetails , \
	}

### main ### 
def main( argv ):
	t0 = time.time()
	values = parseArgs( argv )
	mafFile = values["maf"]
	vcfFile = values["vcf"]
	tsvFile = values["tsv"]
	expressionFile = values["expression"]
	deNovoFile = values["deNovo"]
	assumedDeNovoFile = values["assumedDeNovo"]
	coSegregationFile = values["coSegregation"]
	tsvFile = values["tsv"]
	expressionFile = values["expression"]
	deNovoFile = values["deNovo"]
	assumedDeNovoFile = values["assumedDeNovo"]
	coSegregationFile = values["coSegregation"]
	inheritanceGeneListFile = values["inheritanceGeneList"]
	PP2GeneListFile = values["PP2GeneList"]
	BP1GeneListFile = values["BP1GeneList"]
	diseasesFile = values["diseases"]
	outputFile = values["output"]
	diseaseSpecific = values["specific"]
	doTCGA = values["tcga"]
	doClinVar = values["clinvar"]
	doExAC = values["exac"]
	doVEP = values["vep"]
	vepBatchSize = values["vepBatchSize"]
	clinvarSummaryBatchSize = values["clinvarSummaryBatchSize"]
	clinvarSearchBatchSize = values["clinvarSearchBatchSize"]
	geneColumn = values["geneColumn"]
	chrColumn = values["chrColumn"]
	strandColumn = values["strandColumn"]
	startColumn = values["startColumn"]
	stopColumn = values["stopColumn"]
	refColumn = values["refColumn"]
	altColumn = values["altColumn"]
	sampleColumn = values["sampleColumn"]
	codonColumn = values["codonColumn"]
	peptideChangeColumn = values["peptideChangeColumn"]
	variantClassificationColumn = values["variantClassificationColumn"]
	alleleFrequencyColumn = values["alleleFrequencyColumn"]
	asHTML = values["html"]
	override = values["override"]
	recurrenceThreshold = values["recurrenceThreshold"]
	rareAF = values["rareAF"]
	commonAF = values["commonAF"]
	keepAF = values["keepAF"]
	anyFilter = values["anyFilter"]
	mutationTypes = values["mutationTypes"]
	clustersFile = values["clustersFile"]
	pathogenicVariantsFile = values["pathogenicVariantsFile"]
	annotateInput = values["annotate"]
	scoresMap = values["scoresMap"]
	bHasUserScores = values["bHasUserScores"]
	perl = values["perl"]
	vepScript = values["vepScript"]
	vepConfig = values["vepConfig"]
	#vepDir = values["vepDir"]
	vepCache = values["vepCache"]
	vepOutput = values["vepOutput"]
	ensemblRelease = values["ensemblRelease"]
	vepVersion = values["vepVersion"]
	grch = values["grch"]
	fork = values["fork"]
	referenceFasta = values["referenceFasta"]
	exacVCF = values["exacVCF"]
	macClinVarTSV = values["macClinVarTSV"]
	macClinVarVCF = values["macClinVarVCF"]
	doURLTest = values["doURLTest"]
	includeVCFDetails = values["includeVCFDetails"]
	
	if bHasUserScores:
		print "Using user-provided module scores and/or category thresholds:"
	else:
		print "Using default module scores and category thresholds:"
	for key in sorted( scoresMap.keys() ):
		print "   " + key + " = " + str( scoresMap[ key ] )

	# error-check thresholds
	if scoresMap['minPathogenicScore'] < 0 or scoresMap['minLikelyPathogenicScore'] < 0 \
	or scoresMap['maxBenignScore'] > 0 or scoresMap['maxLikelyBenignScore'] > 0:
		print( "\nCharGer ERROR: Pathogenic thresholds must be positive and benign thresholds must be negative\n" )
		sys.exit(2)
	if scoresMap['minPathogenicScore'] < scoresMap['minLikelyPathogenicScore']:
		print( "\nCharGer ERROR: Score threshold for 'pathogenic' status cannot be set lower than 'likely pathogenic' status\n" )
		sys.exit(2)
	if scoresMap['maxBenignScore'] > scoresMap['maxLikelyBenignScore']:
		print( "\nCharGer ERROR: Score threshold for 'benign' status cannot be set higher than 'likely benign' status\n" )
		sys.exit(2)

	t1 = time.time()

	CharGer = charger.charger()

	CharGer.testLocalInputs( maf = mafFile , \
							 vcf = vcfFile , \
							 tsv = tsvFile , \
							 pathogenicVariants = pathogenicVariantsFile , \
							 inheritanceGeneList = inheritanceGeneListFile , \
							 PP2GeneList = PP2GeneListFile , \
							 BP1GeneList = BP1GeneListFile , \
							 expression = expressionFile , \
							 deNovo = deNovoFile , \
							 assumedDeNovo = assumedDeNovoFile , \
							 coSegregation = coSegregationFile , \
							 diseases = diseasesFile , \
							 perl = perl , \
							 vepScript = vepScript , \
							 vepConfig = vepConfig , \
							 #vepDir = vepDir , \
							 vepCache = vepCache , \
							 vepOutput = vepOutput , \
							 ensemblRelease = ensemblRelease , \
							 vepVersion = vepVersion , \
							 grch = grch , \
							 referenceFasta = referenceFasta , \
							 exacVCF = exacVCF , \
							 macClinVarTSV = macClinVarTSV , \
							 macClinVarVCF = macClinVarVCF , \
							 output = outputFile , \
						)

	[ vepDone , preVEP , exacDone , clinvarDone ] = CharGer.getInputData( maf=mafFile , \
	vcf=vcfFile , \
	tsv=tsvFile , \
	pathogenicVariants=pathogenicVariantsFile , \
	specific=diseaseSpecific , \
	tcga=doTCGA , \
	inheritanceGeneList=inheritanceGeneListFile , \
	PP2GeneList=PP2GeneListFile , \
	BP1GeneList=BP1GeneListFile , \
	expression=expressionFile , \
	deNovo=deNovoFile , \
	assumedDeNovo=assumedDeNovoFile , \
	coSegregation=coSegregationFile , \
	diseases=diseasesFile , \
	gene=geneColumn , \
	chromosome=chrColumn , \
	strand=strandColumn , \
	start=startColumn , \
	stop=stopColumn , \
	ref=refColumn , \
	alt=altColumn , \
	sample=sampleColumn , \
	codon=codonColumn , \
	peptideChange=peptideChangeColumn , \
	variantClassification=variantClassificationColumn , \
	alleleFrequency=alleleFrequencyColumn , \
	rareAF = rareAF , \
	commonAF = commonAF , \
	keepAF = keepAF , \
	anyFilter = anyFilter , \
	mutationTypes = mutationTypes , \
	includeVCFDetails = includeVCFDetails , \
	)

	if doVEP:
		if vepDone and not vepScript:
			doVEP = False
	if doExAC:
		if exacDone and not exacVCF:
			doExAC = False

	if doClinVar:
		if clinvarDone:
			doClinVar = False

	if macClinVarTSV or macClinVarVCF:
		doClinVar = True

	t2 = time.time() 

	CharGer.getExternalData( clinvar=doClinVar , \
	exac=doExAC , \
	vep=doVEP , \
	prevep=preVEP , \
	summaryBatchSize=clinvarSummaryBatchSize , \
	searchBatchSize=clinvarSearchBatchSize , \
	allOptions=False , \
	maxPost=vepBatchSize , \
	perl=perl , \
	vepScript=vepScript , \
	vepConfig=vepConfig , \
	#vepDir=vepDir , \
	vepCache=vepCache , \
	vepOutput=vepOutput , \
	ensemblRelease=ensemblRelease , \
	vepVersion=vepVersion , \
	grch=grch , \
	fork=fork , \
	referenceFasta=referenceFasta , \
	vcf=vcfFile , \
	exacVCF=exacVCF , \
	macClinVarTSV=macClinVarTSV , \
	macClinVarVCF=macClinVarVCF , \
	rareAF = rareAF , \
	commonAF = commonAF , \
	keepAF = keepAF , \
	anyFilter = anyFilter , \
	mutationTypes = mutationTypes , \
	#timeout=(20,20) , \
	)

	t3 = time.time() 

	minimumEvidence = 2

	CharGer.PVS1( )
	CharGer.PS1( )
	CharGer.PS2( )
	CharGer.PS3( )
	CharGer.PS4( )
	CharGer.PM1( recurrenceThreshold , hotspot3d=clustersFile )
	CharGer.PM2( rareAF )
	CharGer.PM3( )
	CharGer.PM4( )
	CharGer.PM5( )
	CharGer.PM6( )
	CharGer.PP1( )
	CharGer.PP2( )
	CharGer.PP3( minimumEvidence )
	CharGer.PP4( )
	CharGer.PP5( )

	CharGer.BA1( commonAF )
	CharGer.BS1( )
	CharGer.BS2( )
	CharGer.BS3( )
	CharGer.BS4( )
	CharGer.BP1( )
	CharGer.BP2( )
	CharGer.BP3( )
	CharGer.BP4( minimumEvidence )
	CharGer.BP5( )
	CharGer.BP6( )
	CharGer.BP7( )

	CharGer.PSC1( )
	CharGer.PMC1( )
	CharGer.PPC1( )
	CharGer.PPC2( )

	CharGer.BSC1( )
	CharGer.BMC1( )

	print( str( rareAF ) + " < " + str( commonAF ) )
	t4 = time.time() 

	CharGer.classify( system="ACMG" , \
                          scoresMap = scoresMap )
	CharGer.classify( system="CharGer" , override=override , \
                          scoresMap = scoresMap )

	t5 = time.time() 

	#CharGer.printClassifications( )

	CharGer.writeSummary( outputFile , delim='\t' , \
						  html=asHTML , annotate = annotateInput , \
						  maf = mafFile , vcf = vcfFile , tsv = tsvFile , \
						  doURLTest = doURLTest )


	#CharGer.pdfSummary( outputFile )

	print "\nCharGer run Times:"
	dt1_0 = t1-t0
	print "input parse time (s): " + str(dt1_0)
	dt2_1 = t2-t1
	print "get input data time (s): " + str(dt2_1)
	dt3_2 = t3-t2
	print "get external data time (s): " + str(dt3_2)
	dt4_3 = t4-t3
	print "modules run time (s): " + str(dt4_3)
	dt5_4 = t5-t4
	print "classification time (s): " + str(dt5_4)
	dt5_0 = t5-t0
	print "CharGer full run time (s): " + str(dt5_0)

if __name__ == "__main__":
	main( sys.argv[1:] )
