#!python
'''G5merge
  Merge an entire HDF5-file into another HDF5-file: copy all datasets from <source> to some root
  in <destination>. The root is based on the file-path of <source> (as it is specified, no
  conversion to for example an absolute path is done). According to the following rules the root
  becomes:

  * File-path, without extension (default)
  * File-path as specified (--noconvert)
  * Only the directory name of the file-path (--dirname)
  * File-path with some regex substitution (--find ... --replace ...)
  * Some custom root (--root)

Usage:
  G5merge [options] <source> <destination>

Arguments:
  <source>            Source HDF5-file.
  <destination>       Destination HDF5-file (appended).

Options:
      --noconvert     Include extension of <source> in root.
      --dirname       Use only directory name of <source> in root.
  -f, --find=ARG      Regex search  to apply to <source>.
  -r, --replace=ARG   Regex replace to apply to <source>.
  -p, --root=ARG      Manually set root.
      --rm            Remove <source> after successful copy.
  -d, --dry-run       Dry run.
      --verbose       Verbose operations.
  -h, --help          Show help.
      --version       Show version.

(c - MIT) T.W.J. de Geus | tom@geus.me | www.geus.me | github.com/tdegeus/GooseHDF5
'''

# ==================================================================================================

# temporary fix: suppress warning from h5py
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import sys, os, re, h5py, docopt, GooseHDF5, traceback

# ==================================================================================================

def confirm(message='Proceed [y/n]?\n'):
  r'''
Prompt user for confirmation. The function loops until the user responds with

* 'y' -> True
* 'n' -> False
  '''

  while True:

    # - prompt message, get user's response
    user = input(message)

    # - check response
    if not user                     : print('Please enter y or n.'); continue
    if user not in ['y','Y','n','N']: print('Please enter y or n.'); continue
    if user     in ['y','Y'        ]: return True
    if user     in ['n','N'        ]: return False

# ==================================================================================================

def error(message):
  r'''
Print error message and quit.
  '''

  print(message)

  sys.exit(1)

# ==================================================================================================

def quit(message):
  r'''
Prompt user for confirmation. If the response is negative this function quits the program.
  '''

  if not confirm(message):
    sys.exit(1)

# ==================================================================================================

def isfile(fname):
  r'''
Check if a fail exists, quit otherwise.
  '''

  if not os.path.isfile(fname):
    error('"{0:s}" does not exist'.format(fname))

# ==================================================================================================

def h5py_File(fname, mode):

  try:

    return h5py.File(fname, mode)

  except:

    print('Failed to open {0:s}'.format(fname))

    traceback.print_exc()

# ==================================================================================================

# parse command-line options
# --------------------------

args = docopt.docopt(__doc__,version='0.0.2')

# check files
# -----------

# files that are required to exist
isfile(args['<source>'])

# skip files that are the same
if os.path.abspath(args['<source>']) == os.path.abspath(args['<destination>']):

  print('<source> and <destination> are the same, skipping "{0:s}"'.format(args['<source>']))

  sys.exit(0)

# get paths
# ---------

# apply regex to full path
if args['--find'] and args['--replace'] and not args['--root']:

  root = GooseHDF5.abspath(re.sub(args['--find'], args['--replace'], args['<source>']))

# get explicitly specified path
elif not args['--find'] and not args['--replace'] and args['--root']:

  root = GooseHDF5.abspath(args['--root'])

# default: path to folder
elif not args['--find'] and not args['--replace'] and not args['--root']:

  if       args['--dirname'  ]: root = GooseHDF5.abspath(os.path.split   (args['<source>'])[0])
  elif not args['--noconvert']: root = GooseHDF5.abspath(os.path.splitext(args['<source>'])[0])
  else                        : root = GooseHDF5.abspath(                 args['<source>'])[0]

# catch illegal cases
else:

  error('Conflicting options "--find", "--replace", and "--root"')

# dry run
# -------

if args['--dry-run']:

  # print general progress
  print('Merging {0:s} -> {1:s}:{2:s}'.format(args['<source>'],args['<destination>'],root))

  # check that datasets are not already present in the <destination>
  if os.path.isfile(args['<destination>']):

    # - open files
    source = h5py_File(args['<source>'     ], 'r')
    dest   = h5py_File(args['<destination>'], 'r')

    # - get datasets to copy
    source_datasets = list(GooseHDF5.getdatasets(source))

    # - convert to datasets to store
    dest_datasets = [GooseHDF5.join(root,path[1:]) for path in source_datasets]

    # - check
    if GooseHDF5.exists_any(dest, dest_datasets):
      error('One of the datasets in "{0:s}" exists:\n  {1:s}'.format(args['<destination>'], '\n  '.join(dest_datasets)))

    # - close HDF5-files
    source.close()
    dest  .close()

  # quit
  sys.exit(0)

# verbose
# -------

if args['--verbose']:

  print('Merging {0:s} -> {1:s}:{2:s}'.format(args['<source>'],args['<destination>'],root))

# merge
# -----

# open HDF5-files
source = h5py_File(args['<source>'     ], 'r')
dest   = h5py_File(args['<destination>'], 'a')

# get datasets to copy
source_datasets = list(GooseHDF5.getdatasets(source))

# convert to datasets to store
dest_datasets = [os.path.join(root,path[1:]) for path in source_datasets]

# merge
try:
  GooseHDF5.copydatasets(source, dest, source_datasets, dest_datasets)
except Exception as e:
  print(e)

# finish
# ------

# close HDF5-files
source.close()
dest  .close()

# remove file
if args['--rm']: os.remove(args['<source>'])
