#!python
import argparse
import logging
import sys
from pathlib import Path

import redblackgraph as rb
from redblackgraph.util.relationship_file_io import RelationshipFileReader, MAX_COLUMNS_EXCEL
from redblackgraph.reference.triangularization import canonical_sort

MAX_PRACTICAL_SIZE = 1500

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description="rbgcf - get cannonical form: parses rbg ingest files and generates the cannonical form",
        add_help=False,
        usage="rbgcf -f <base-file> -o <out-directory> (<base-file>.vertices.csv and <base-file>.edges.csv must exist)",
    )
    parser.add_argument("-f", "--basefile", metavar="<STR>", type=str, help="base file name", required=True)
    parser.add_argument("-o", "--outdir", metavar="<STR>", type=str,
                        help="output directory (default is same directory as basefile")
    parser.add_argument("-v", "--verbose", action="store_true", default=False, help="Increase output verbosity [False]")
    parser.add_argument("-h", "--hops", default=4, type=int, help="Number of hops to include in graph")
    parser.add_argument("-l", "--filter", action="append",
                        default=["BiologicalParent", "UntypedParent", "UnspecifiedParentType"], type=str,
                        nargs="+", help="Number of hops to include in graph")
    parser.add_argument("-r", "--replace", action="store_true", default=False, help="Replace existing output files")

    # extract arguments from the command line
    try:
        parser.error = parser.exit
        args = parser.parse_args()
    except SystemExit:
        parser.print_help()
        sys.exit(2)

    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG if args.verbose else logging.INFO)
    logger = logging.getLogger(__name__)

    basefile = args.basefile
    vertices_file = Path(basefile + ".vertices.csv")
    edges_file = Path(basefile + ".edges.csv")
    basename = str(Path(basefile).parts[-1])
    hops = args.hops

    if not vertices_file.exists():
        sys.stderr.write(f"{vertices_file} not found")
        sys.exit(2)
    if not edges_file.exists():
        sys.stderr.write(f"{edges_file} not found")
        sys.exit(2)

    outdir = args.outdir
    if not outdir:
        outdir = Path(basefile).parent
    else:
        outdir = Path(outdir)

    outputfile = outdir / f"{basename}.{hops}.simple.xlsx"
    cannonicalfile = outdir / f"{basename}.{hops}.cannonical.xlsx"
    if not args.replace:
        if outputfile.exists():
            sys.stderr.write(f"{outputfile} already exists")
            sys.exit(2)
        if cannonicalfile.exists():
            sys.stderr.write(f"{cannonicalfile} already exists")
            sys.exit(2)

    logger.info("Reading graph input files")

    reader = RelationshipFileReader(vertices_file, edges_file, hops, args.filter)
    graph: rb.array = reader()

    if len(graph) >= MAX_COLUMNS_EXCEL:
        logger.error(f"Trying to ingest a graph that exceeds the size excel can handle. ({len(graph)} vertices)")
    if len(graph) >= MAX_PRACTICAL_SIZE:
        logger.warning(f"This graph is on the large size ({len(graph)}). Processing times for graphs in excess of {MAX_PRACTICAL_SIZE} nodes are noticably slower.")

    logger.info(f"Graph is created. Writing out simple format as xslx file to {outputfile}")

    writer = rb.RedBlackGraphWriter(reader)
    writer(graph, output_file=outputfile)

    logger.info("Simple format created. Computing transitive closure of Graph")

    R_star = graph.transitive_closure().W

    logger.info("Computing cannonical form of Graph")

    R_cannonical = canonical_sort(R_star)

    logger.info(f"Writing out cannonical form to {cannonicalfile}")
    writer(R_cannonical.A, output_file=cannonicalfile, key_permutation=R_cannonical.label_permutation)



