#!/usr/bin/env python

from __future__ import print_function

from collections import OrderedDict
import argparse
import os
import atexit
from subprocess import call

import pandas as pd

from epic.version import __version__

parser = argparse.ArgumentParser(
    description="""Count reads in windows to create count matrix. (This matrix does not contain
    information about enrichment and is not usable with downstream tools such
    as epic-cluster. Use the -sm option in epic to create such a matrix.)

(Visit github.com/endrebak/epic for examples and help.)

    """,
    prog=os.path.basename(__file__))

parser.add_argument(
    '--infiles',
    '-i',
    required=True,
    type=str,
    nargs='+',
    help='''ChIP files to count reads in (bed/bedpe format).''')


parser.add_argument(
    '--number-cores',
    '-cpu',
    required=False,
    default=1,
    type=int,
    help=
    '''Number of cpus to use. Can use at most one per chromosome. Default: 1.''')


parser.add_argument(
    '--keep-duplicates',
    '-k',
    required=False,
    default=False,
    action='store_true',
    help=
    '''Keep reads mapping to the same position on the same strand within a library. Default is to remove all but the first duplicate.''')


parser.add_argument(
    '--window-size',
    '-w',
    required=False,
    default=200,
    type=int,
    help=
    '''Size of the windows to scan the genome. WINDOW_SIZE is the smallest possible island. Default 200.''')

parser.add_argument(
    '--fragment-size',
    '-fs',
    required=False,
    default=150,
    type=int,
    help=
    '''(Single end reads only) Size of the sequenced fragment. The center of the the fragment will be taken as half the fragment size. Default 150.''')


parser.add_argument('--genome',
                    '-gn',
                    required=False,
                    default="hg19",
                    type=str,
                    help='''Which genome to analyze. Default: hg19. If --chromsizes flag is given, --genome is not required.''')

parser.add_argument(
    '--chromsizes',
    '-cs',
    required=False,
    type=str,
    help=
    '''Set the chromosome lengths yourself in a file with two columns: chromosome names and sizes. Useful to analyze custom genomes, assemblies or simulated data. Only chromosomes included in the file will be analyzed.''')

parser.add_argument(
    '--outfile',
    '-o',
    required=True,
    type=str,
    help=
    '''File to write gzipped count matrix to.''')


if __name__ == "__main__":

    args = parser.parse_args()

    from sys import argv

    from epic.run.run_epic import multiple_files_count_reads_in_windows, _merge_files
    from epic.config.genomes import (create_genome_size_dict,
                                     create_genome_size_dict_custom_genome)
    from epic.matrixes.matrixes import put_dfs_in_chromosome_dict

    if not args.chromsizes:
        args.chromosome_sizes = create_genome_size_dict(args.genome)
    else:
        args.chromosome_sizes = create_genome_size_dict_custom_genome(args.chromsizes)

    windows = multiple_files_count_reads_in_windows(args.infiles, args)
    windows_merged = _merge_files(windows.values(), args.number_cores)

    d = put_dfs_in_chromosome_dict(windows_merged)

    df = pd.concat(d.values()).fillna(0)
    df = df.set_index("Chromosome Bin".split())
    df.to_csv(args.outfile, sep=" ", compression="gzip")
