#!/bin/sh -e

##########################################################################
#   Synopsis:
#       filter [--min-mnc N] [--max-sd N] [--min-fc N] [--max-p-val N] fc-file.txt
#       
#   Description:
#       Filter output of fasda fold-change based on any of the reported
#       statistics.  An example of such output is shown below.
#
#   .nf
#   .na
# Feature                 MNC1    MNC2  SD/C1  SD/C2  FC 1-2 log2(FC) P-val
# YPL071C_mRNA            29.6    31.1    0.3    0.2    1.05    0.07  0.72611
# YLL050C_mRNA           399.5   543.8    0.2    0.2    1.36    0.44  0.02857
# YMR172W_mRNA            60.6    83.3    0.2    0.2    1.38    0.46  0.02167
# YOR185C_mRNA            57.1    56.1    0.3    0.2    0.98   -0.03  0.92562
# YLL032C_mRNA            33.9    21.6    0.3    0.3    0.64   -0.65  0.14138
# YBR225W_mRNA            61.4    75.4    0.2    0.2    1.23    0.30  0.11281
#   .ad
#   .fi
#       
#   Arguments:
#       --min-mnc N     Minimum average of MNC1 and MNC2 (default = 1)
#       --max-sd N      Maxium average of SD/C1 and SD/C2 (default = 1.0)
#       --min-fc N      Minimum of FC or 1/FC (default = 1.0)
#       --max-p-val     Maximum P-value (default = 0.05)
#       
#   Returns:
#       0 on success, non-zero error codes otherwise
#
#   Examples:
#       fasda filter --min-fc 2 Results/11-fasda-kallisto/fc-3-replicates.txt
#
#   See also:
#       fasda-fold-change(1)
#       
#   History:
#   Date        Name        Modification
#   2025-03-31  Jason Bacon Begin
##########################################################################

usage()
{
    cat << EOM

Usage: fasda filter [--min-mnc N] [--max-sd N] [--min-fc N] [--max-p-val N] fc-file.txt

* At least one filtering flag above must be specified.

* Average of MNC1 and MNC2 must be >= min-mnc       Default = 1
* Average of SD/C1 and SD/C2 must be <= max-sd      Default = 1.0
* FC or 1/FC must be >= min-fc                      Default = 1.0
* P-val must be <= max-p-val                        Default = 0.05

Example data:

Feature                 MNC1    MNC2  SD/C1  SD/C2  FC 1-2 log2(FC) P-val
YPL071C_mRNA            29.6    31.1    0.3    0.2    1.05    0.07  0.72611
YLL050C_mRNA           399.5   543.8    0.2    0.2    1.36    0.44  0.02857
YMR172W_mRNA            60.6    83.3    0.2    0.2    1.38    0.46  0.02167
YOR185C_mRNA            57.1    56.1    0.3    0.2    0.98   -0.03  0.92562
YLL032C_mRNA            33.9    21.6    0.3    0.3    0.64   -0.65  0.14138
YBR225W_mRNA            61.4    75.4    0.2    0.2    1.23    0.30  0.11281

Example command:

$0 --min-mnc 20 --max-sd 0.3 --min-fc 2.0 --max-p-val 0.05 Results/11-fasda-kallisto/fc-3-replicates.txt

EOM
    exit 64     # sysexits(3) EX_USAGE
}


##########################################################################
#   Main
##########################################################################

if [ $# -lt 3 ]; then
    usage
fi

min_mnc=1
max_sd=1.0
min_fc=1.0
max_p_val=0.05

while [ $(echo $1 | cut -c 1,2) = '--' ]; do
    case $1 in
    --min-mnc)
	min_mnc=$2
	;;
    
    --max-sd)
	max_sd=$2
	;;
    
    --min-fc)
	min_fc=$2
	;;
    
    --max-p-val)
	max_p_val=$2
	;;
    
    *)
	usage
	;;
    esac
    shift
    shift
done
if [ $# != 1 ]; then
    usage
fi
file=$1

set -x
awk -v min_mnc=$min_mnc -v max_sd=$max_sd -v min_fc=$min_fc -v max_p_val=$max_p_val \
    '(($2 + $3) / 2.0 >= min_mnc) && (($4 + $5) / 2.0 <= max_sd) && (($6 >= min_fc) || ($6 != 0 && 1.0/$6 >= min_fc)) && ($8 <= max_p_val) { print $0 }' \
    $file
