#!python
# baleen
# Admin script for executing Baleen commands.
#
# Author:   Benjamin Bengfort <benjamin@bengfort.com>
# Created:  Fri Sep 19 10:56:44 2014 -0400
#
# Copyright (C) 2014 Bengfort.com
# For license information, see LICENSE.txt
#
# ID: baleen [] benjamin@bengfort.com $

"""
Admin script for executing Baleen commands.
"""

##########################################################################
## Imports
##########################################################################

import os
import sys
import json
import time
import baleen
import argparse
import schedule
import baleen.opml
import baleen.feed
import baleen.models as db
import baleen.export

from functools import partial
from baleen.config import settings
from baleen.utils.logger import IngestLogger

##########################################################################
## Module Constants
##########################################################################

PROG = {
    "version": baleen.get_version(),
    "epilog": "Not intended for production use.",
    "description": "Administrative utility for Baleen ingestor.",
}


##########################################################################
## Command Functions
##########################################################################

def ingest_feeds(args):
    """
    Ingests the RSS feeds to MongoDB
    """
    db.connect()
    ingestor = baleen.feed.MongoFeedIngestor()
    ingestor.ingest(verbose=args.verbose)
    return ""


def ingest_opml(args):
    """
    Imports the OPML file to MongoDB
    """
    count = 0
    for path in args.opml:
        count += baleen.opml.ingest(path)
    return "Ingested %i feeds from %i OPML files" % (count, len(args.opml))


def summary(args):
    """
    Print info about Baleen from the database
    """
    # Setup output and connect to database.
    output = []
    db.connect()

    # Printout configuration details as necessary.
    if args.config:
        output.append(u"Configuration:")
        output.append(unicode(settings))
        output.append(u"")

    output.append(u"Baleen Status:")
    output.append(
        u"{} Feeds and {} Posts".format(
            db.Feed.objects.count(), db.Post.objects.count()
        )
    )

    latest = db.Post.objects.order_by('-id').first()
    output.extend([
        u"",
        u"Latest Post: ",
        u"    Title: \"{}\"".format(latest.title),
        u"    Feed: \"{}\"".format(latest.feed.title),
        u"    Fetched: {}".format(latest.created.strftime("%Y-%m-%d %H:%M:%S"))
    ])

    return u"\n".join(output).encode('utf-8', errors='replace')


def export(args):
    """
    Exports an HTML corpus at the specified location.
    """

    # Connect to database
    db.connect()

    # Export from the database
    exporter = baleen.export.MongoExporter()
    exporter.export(args.location[0], categories=args.categories)

    # Perform counts of export
    root = args.location[0]
    cats = filter(
        os.path.isdir, [os.path.join(root, cat) for cat in os.listdir(root)]
    )
    docs = sum(len(os.listdir(d)) for d in cats)

    return (
        "Exported {} documents in {} categories "
        "as well as a readme to {}.".format(
            docs, len(cats), root
        )
    )


def run(args):
    """
    Runs the ingest command every hour.
    """
    logger = IngestLogger()
    logger.info("Starting baleen ingestion service every hour.")
    schedule.every().hour.do(partial(ingest_feeds, args))

    while True:
        try:
            schedule.run_pending()
            time.sleep(1)
        except (KeyboardInterrupt, SystemExit):
            break

    logger.info("Stopping baleen ingestion service.")
    return ""

##########################################################################
## Console Parsers
##########################################################################

def csv(type=int):
    """
    Argparse type for comma seperated values. Also parses the type, e.g. int.
    """
    def parser(s):
        try:
            parse = lambda p: type(p.strip())
            return map(parse, s.split(","))
        except Exception as e:
            raise argparse.ArgumentTypeError(
                "Could not parse CSV value to type {}: {!r}".format(type.__name__, s)
            )

    return parser

##########################################################################
## Main Method and functionality
##########################################################################

def main(*argv):

    parser     = argparse.ArgumentParser(**PROG)
    subparsers = parser.add_subparsers(title='commands', description='Baleen utilities')

    # Ingest Command
    ingest_parser = subparsers.add_parser('ingest', help='Ingests the RSS feeds to MongoDB')
    ingest_parser.add_argument('-v', '--verbose', action="store_true", default=False, help='Print details.')
    ingest_parser.set_defaults(func=ingest_feeds)

    # Import Command
    import_parser = subparsers.add_parser('import', help='Import a list of feeds from an OPML file')
    import_parser.add_argument('opml', nargs="+", type=str, help='OPML file(s) to import to database.')
    import_parser.set_defaults(func=ingest_opml)

    # Export Command
    export_parser = subparsers.add_parser('export', help='Export the raw HTML corpus for doing NLP')
    export_parser.add_argument('-C', '--categories', type=csv(str), default=None, help='Specify categories to export')
    export_parser.add_argument('location', nargs= 1, type=str, help='Location to write the corpus out to.')
    export_parser.set_defaults(func=export)

    # Run Command (same args as ingest)
    run_parser = subparsers.add_parser('run', help='Runs the ingest command every hour')
    run_parser.add_argument('-v', '--verbose', action="store_true", default=False, help='Print details.')
    run_parser.set_defaults(func=run)

    # Info Command
    summary_parser = subparsers.add_parser('info', help='Print info about Baleen from the database')
    summary_parser.add_argument('-c', '--config', action='store_true', default=False, help='Print configuration.')
    summary_parser.set_defaults(func=summary)

    # Handle input from the command line
    args = parser.parse_args()                # Parse the arguments
    try:
        msg = "%s\n" % str(args.func(args))    # Call the default function
        parser.exit(0, msg)                    # Exit clearnly with message
    except Exception as e:
        parser.error(str(e))                   # Exit with error

if __name__ == '__main__':
    main(*sys.argv[1:])
