#!/usr/bin/python
'''`cat` for avro files'''

from avro.io import DatumReader
from avro.datafile import DataFileReader

import json
import csv
from sys import stdout, stdin
import re
from itertools import ifilter
from functools import partial

def show_json(row):
    print(json.dumps(row))

_write_row = csv.writer(stdout).writerow
def show_csv(row):
    # We sort the keys to the fields will be in the same place
    _write_row([row[key] for key in sorted(row)])

def record_match(expr, record):
    return eval(expr, None, {"r" : record})

def print_avro(avro, args):
    if args.header and (args.format != "csv"):
        raise ValueError("--header applies only to CSV format")

    # Apply filter first
    if args.filter:
        avro = ifilter(partial(record_match, args.filter), avro)

    for i in xrange(args.skip):
        try:
            next(avro)
        except StopIteration:
            return

    show = show_csv if args.format == "csv" else show_json
    for i, record in enumerate(avro):
        if i == 0 and args.header:
            _write_row(sorted(record.keys()))
        if i >= args.count:
            break
        show(record)

def print_schema(avro):
    schema = avro.meta["avro.schema"]
    # Pretty print
    print json.dumps(json.loads(schema), indent=4)

def open_file(filename):
    if filename == "-":
        return stdin

    return open(filename)

def main(argv=None):
    import sys
    from argparse import ArgumentParser

    argv = argv or sys.argv

    parser = ArgumentParser(description="`cat` for Avro files", prog="avrocat")
    parser.add_argument("filename", help="avro file (- for stdin)")
    parser.add_argument("-n", "--count", default=float("Infinity"),
                    help="number of records to print", type=int)
    parser.add_argument("-s", "--skip", help="number of records to skip", type=int,
                   default=0)
    parser.add_argument("-f", "--format", help="record format", default="json",
                  choices=["json", "csv"])
    parser.add_argument("--header", help="print CSV header", default=False,
                   action="store_true")
    parser.add_argument("--filter", help="filter records (e.g. r['age']>1)",
                    default=None)
    parser.add_argument("--schema", help="print schema", action="store_true",
                       default=False)

    args = parser.parse_args(argv[1:])

    try:
        fo = open_file(args.filename)
    except (OSError, IOError) as e:
        raise SystemExit("error: can't open %s - %s" % (args.filename, e))

    avro = DataFileReader(fo, DatumReader())

    if args.schema:
        print_schema(avro)
        return

    print_avro(avro, args)

if __name__ == "__main__":
    main()

