#! /usr/bin/env python3
#
#
# Advene: Annotate Digital Videos, Exchange on the NEt
# Copyright (C) 2008-2017 Olivier Aubert <contact@olivieraubert.net>
#
# This file is part of Advene.
#
# Advene is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Advene is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Foobar; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
#

import logging
logger = logging.getLogger(__name__)

"""Data migration script.

It is intended to migrate data to a new template. The assumption is
that the template package (first parameter) has a old_id_mapping
metadata dict, that has original type ids as keys and new type ids as
values.

The template data structure is first merged into the source package.
Then old annotation types are checked: if they have a mapping defined
in old_id_mapping, then move their annotations to the new type, and
delete the original type.

Then save the resulting package with the original name plus
"_migrated" suffix.

"""
import json
import os
import re
import sys

try:
    import advene.core.config as config
except ImportError:
    # Try to set path
    (maindir, subdir) = os.path.split(os.path.dirname(os.path.abspath(sys.argv[0])))
    if subdir == 'scripts':
        # Chances are that we were in a development tree...
        libpath = os.path.join(maindir, "lib")
        sys.path.insert(0, libpath)
        import advene.core.config as config
        config.data.fix_paths(maindir)

from advene.model.annotation import Annotation
from advene.model.package import Package
from advene.core.idgenerator import Generator
from advene.util.merger import Differ

if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)
    SYNTAX = "%s template.azp old_file.azp [old_file2.azp...]" % sys.argv[0]
    try:
        template_file = sys.argv[1]
    except IndexError:
        logger.error(SYNTAX)
        sys.exit(1)
    source_files = sys.argv[2:]

    template = Package(template_file)
    if not source_files:
        logger.error("No source file is specified")
        logger.error(SYNTAX)
        sys.exit(1)

    for source in source_files:
        p = Package(source)
        p._idgenerator = Generator(p)

        # List of old annotation types with annotation
        old_types = [ at for at in p.annotationTypes if at.annotations ]

        # Merge template structure into source package
        differ = Differ(template, p)
        diff = differ.diff_structure()
        for name, s, d, action in diff:
            action(s, d)

        def alpha(s):
            return re.sub('[^a-zA-Z]', '', s.strip()).lower()

        # Get the oldAdveneIdentifiers mapping
        old_id_mapping = template.getMetaData(config.data.namespace, "old_id_mapping")
        if old_id_mapping:
            #logging.info("old_id_mapping %s", old_id_mapping)
            old_id_mapping = json.loads(old_id_mapping)
            # Mapping with only alphabetic characters
            transformed_mapping = dict( (alpha(k), v)
                                        for (k, v) in old_id_mapping.items())

        def matching_type(at):
            """Return the new annotation type matching at, None if there is none.
            """
            if at.id in old_id_mapping:
                # Simple case: we can match ids
                #logger.debug("Direct mapping")
                return old_id_mapping[at.id]
            # Check on the title
            t = alpha(at.title)
            if t in transformed_mapping:
                #logger.debug("Title mapping %s", t)
                return transformed_mapping[t]
            # Last try: there could be suffixes, so try to remove it.
            t = alpha("".join(at.title.split()[:-1]))
            if t in transformed_mapping:
                #logger.debug("Stripped title mapping %s", t)
                return transformed_mapping[t]
            logger.debug("Cannot find mapping for %s (%s)", at.title, t)
            return None

        total_annotations = 0
        total_types = 0
        # Migrate data to new types when they can be determined
        for at in old_types:
            new_at = matching_type(at)
            if new_at is not None:
                new_at = p.get_element_by_id(new_at)
                # Migrate old annotations
                logging.info("Migrating %d annotations from %s (%s) to %s", len(at.annotations), at.id, at.title, new_at.id)
                total_types += 1
                total_annotations += len(at.annotations)
                for a in at.annotations:
                    new = p.createAnnotation(type = new_at,
                                             ident=p._idgenerator.get_id(Annotation),
                                             fragment=a.fragment.clone())
                    new.author = a.author
                    new.content.data = a.content.data
                    new.setDate(a.date)
                    p.annotations.append(new)
                    p.annotations.remove(a)
                # There should be no more annotations in the type. Remove it.
                at.schema.annotationTypes.remove(at)
                old_types.remove(at)

        # Save package
        (path, ext) = os.path.splitext(source)
        new_name = "".join((path, '_migrated', ext))
        logger.info("Converted %d annotations from %d types. There remain %d annotations in %d unconverted old types",
                    total_annotations, total_types,
                    sum(len(at.annotations) for a in old_types),
                    len(old_types))
        logger.info("Saving migrated package to %s", new_name)
        p.save(new_name)
