#! /usr/bin/env python3

"""
Usage:
    topsim-cli <query> [options] [<file>]

    topsim-cli --help


Options:
    -I                     Case-sensitive matching.
    -k <k>                 Maximum number of search results. [default: 1]
    --tie                  Include all the results with the same similarity of the "k"-th result. May return more than "k" results.

    -s <simfunc>           Use "jaccard", "overlap", or "tversky" as similarity function. [default: jaccard]
    -e <e>                 Parameter for "tversky" similarity. [default: 0.001]

    --mapping=<mapping>    Map each string to a set of either "gram"s or "word"s. [default: gram]
    --numgrams=<numgrams>  Number of characters for each gram when mapping by "gram". [default: 2]

    --quiet                Do not print additional information to standard error.
"""

import sys
from functools import partial
import os
import time
import resource
import gc

from docopt import docopt

from topsim import TopSim

argv = docopt(__doc__)

print2 = partial(print, file=(open(os.devnull, 'w') if argv["--quiet"] else sys.stderr))

startTime = time.clock()
def printResourceUsage():
    global startTime

    gc.collect()

    print2("{:.2} sec | {:.2} MB".format(
        time.clock() - startTime,
        resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024
    ))

    startTime = time.clock()


sRawStrs = [
    line.rstrip('\r\n')
    for line in (open(argv["<file>"]) if argv["<file>"] else sys.stdin)
]

print2("Indexing...", end=" ")

ts = TopSim(
    sRawStrs,
    argv["-I"],
    mapping=argv["--mapping"],
    numGrams=int(argv["--numgrams"])
)

printResourceUsage()

print2("Searching...", end=" ")

rBest = ts.search(
    argv["<query>"],
    int(argv["-k"]),
    argv["--tie"],
    argv["-s"],
    float(argv["-e"])
)

printResourceUsage()

print2()

for sim, lns in rBest:
    for ln in lns:
        print("{}\t{:.4}".format(sRawStrs[ln], sim))
