#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import argparse
import json
import logging
import os
import sqlite3

from networkx.readwrite import json_graph

from pareidoscope.utils import conllu
from pareidoscope.utils import cwb
from pareidoscope.utils import database
from pareidoscope.utils import nx_graph

logging.basicConfig(format="%(levelname)s %(asctime)s: %(message)s", level=logging.INFO)


def arguments():
    """"""
    parser = argparse.ArgumentParser(description="Convert a corpus in CoNLL-U or CWB-treebank format into a corresponding SQLite database")
    parser.add_argument("--db", type=os.path.abspath, required=True, help="SQLite3 database for results")
    parser.add_argument("--no-id", action="store_true", help="Corpus has no unique sentence IDs, create them on the fly")
    parser.add_argument("-f", "--format", choices=["conllu", "cwb"], required=True, help="Input format of the corpus: CoNLL-U or CWB-treebank")
    parser.add_argument("CORPUS", type=argparse.FileType("r", encoding="utf-8"), help="The input corpus")
    args = parser.parse_args()
    return args


def main():
    """"""
    args = arguments()
    conn, c = database.create_db(args.db)
    i = 1
    if args.format == "cwb":
        sents = cwb.sentences_iter(args.CORPUS, return_id=True)
        create_digraph = nx_graph.create_nx_digraph_from_cwb
    elif args.format == "conllu":
        sents = conllu.sentences_iter(args.CORPUS, return_id=True)
        create_digraph = nx_graph.create_nx_digraph_from_conllu
    for sentence, origid in sents:
        if args.no_id:
            origid = "s-%d" % i
            i += 1
        gs = create_digraph(sentence, origid)
        sensible = nx_graph.is_sensible_graph(gs)
        if sensible:
            graph = json.dumps(json_graph.node_link_data(gs), ensure_ascii=False, sort_keys=True)
            database.insert_sentence(c, origid, gs, graph)
    conn.commit()
    conn.close()


if __name__ == "__main__":
    main()
