#!python
'''G5select
  Select datasets (or groups of datasets) from a HDF5-file and store to a new HDF5-file.

JSON:
  The input can be a JSON file that looks like:

    {
      "/new/path" : "/old/path",
      ...
    }

Usage:
  G5select [options] [--path ARG]... <source> <destination>

Arguments:
  <source>          Source HDF5-file.
  <destination>     Destination HDF5-file (appended).

Options:
  -p, --path=ARG    Pair of paths: "/destination/path;/source/path".
      --sep=ARG     Set path separator. [default: ;]
  -j, --json=ARG    JSON file with contains the path change.
  -i, --ignore      Ignore paths that are not present in <source>.
  -f, --force       Force continuation, continue also if this operation discards fields.
  -d, --dry-run     Dry run.
      --verbose     Verbose operations.
  -h, --help        Show help.
      --version     Show version.

(c - MIT) T.W.J. de Geus | tom@geus.me | www.geus.me | github.com/tdegeus/GooseHDF5
'''

# ==================================================================================================

# temporary fix: suppress warning from h5py
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import sys, os, re, h5py, docopt, GooseHDF5, json

# ==================================================================================================

def confirm(message='Proceed [y/n]?\n'):
  r'''
Prompt user for confirmation. The function loops until the user responds with

* 'y' -> True
* 'n' -> False
  '''

  while True:

    # - prompt message, get user's response
    user = input(message)

    # - check response
    if not user                     : print('Please enter y or n.'); continue
    if user not in ['y','Y','n','N']: print('Please enter y or n.'); continue
    if user     in ['y','Y'        ]: return True
    if user     in ['n','N'        ]: return False

# ==================================================================================================

def error(message):
  r'''
Print error message and quit.
  '''

  print(message)

  sys.exit(1)

# ==================================================================================================

def quit(message):
  r'''
Prompt user for confirmation. If the response is negative this function quits the program.
  '''

  if not confirm(message):
    sys.exit(1)

# ==================================================================================================

def isfile(fname):
  r'''
Check if a fail exists, quit otherwise.
  '''

  if not os.path.isfile(fname):
    error('"{0:s}" does not exist'.format(fname))

# ==================================================================================================

# parse command-line options
# --------------------------

args = docopt.docopt(__doc__,version='0.0.2')

# check files
# -----------

# files that are required to exist
if True          : isfile(args['<source>'])
if args['--json']: isfile(args['--json'  ])

# check file existence of the destination
if os.path.isfile(args['<destination>']) and not args['--force']:
  quit('File "{0:s}" already exists, continue [y/n]? '.format(args['<destination>']))

# get paths
# ---------

# read/allocate
if args['--json']: paths = json.loads(open(args['--json'],'r').read())
else             : paths = {}

# add paths from format: "/destination/path;/source/path"
for path in args['--path']:
  paths[path.split(args['--sep'])[0]] = path.split(args['--sep'])[1]

# check/convert paths
# -------------------

# open HDF5-file
source = h5py.File(args['<source>'], 'r')

# check that all source paths exist
# - paths to delete
rm = []
# - check
for new, old in paths.items():
  if old not in source:
    if args['--ignore']: rm += [new]
    else               : error('"{0:s}" not found in "{1:s}"'.format(old, args['<source>']))
# - remove paths
for new in rm:
  del paths[new]

# expand datasets
# - allocate
pairs = []
# - fill
for new, old in paths.items():
  # -- get all paths from "old" downwards
  src = list(GooseHDF5.getdatasets(source, old))
  # -- rename root
  dst = [re.sub(r'('+old+r')(.*)', new+r'\2', i) for i in src]
  # -- add to pairs
  for i,j in zip(src, dst): pairs += [(i,j)]
# - rename
paths = pairs
# - sort
paths = sorted(paths, key=lambda path: path[0])

# check discarded paths
if not args['--force']:
  # - get difference
  diff = sorted(list(set(GooseHDF5.getdatasets(source)) - set([i for i,_ in paths])))
  # - prompt user
  if len(diff) > 0 and not args['--force'] and not args['--dry-run']:
    quit('The following paths are not copied:\n  '+'\n  '.join(diff)+'\nContinue [y/n]? ')
else:
  diff = []

# verbose
# -------

if args['--verbose'] or args['--dry-run']:

  width = max([len(i) for i,_ in paths])
  fmt   = '\033[92m  {0:%ds} -> {1:%ds}\033[0m' % (width, width)

  print('The following fields are copied from "{0:s}" to "{1:s}"'.format(
    args['<source>'], args['<destination>']
  ))
  print('')

  for i,j in paths:
    print(fmt.format(i,j))

  print('')

if args['--dry-run'] and len(diff) > 0:

  print('\033[93mThe following fields are NOT copied from "{0:s}"\033[0m'.format(args['<source>']))
  print('')
  for i in diff:
    print('\033[91m  {0:s}\033[0m'.format(i))
  print('')

if args['--dry-run']:

  sys.exit(0)

# copy
# ----

# open HDF5-file
dest = h5py.File(args['<destination>'], 'w')

# copy datasets
GooseHDF5.copydatasets(source, dest, [i for i,_ in paths], [j for _,j in paths])

# finish
# ------

# close HDF5-files
source.close()
dest  .close()
