#!/usr/bin/env python
#
# brozzle-page - command line utility for brozzling a single page, i.e. opening
# it in a browser, running some javascript behaviors, and printing outlinks
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import argparse
import os
import sys
import logging
import brozzler
import re
import warnings
import requests
import string
import datetime

arg_parser = argparse.ArgumentParser(prog=os.path.basename(__file__),
        description="brozzle-page - brozzle a single page",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
arg_parser.add_argument('url', metavar='URL', help='page url')
arg_parser.add_argument('-e', '--executable', dest='chrome_exe', default='chromium-browser',
        help='executable to use to invoke chrome')
arg_parser.add_argument("--proxy", dest="proxy", default=None, help="http proxy for this site")
arg_parser.add_argument('--enable-warcprox-features', dest='enable_warcprox_features',
                action='store_true', help='enable special features for this site that assume the configured proxy is warcprox')
arg_parser.add_argument("-v", "--verbose", dest="log_level",
        action="store_const", default=logging.INFO, const=logging.DEBUG)
arg_parser.add_argument("--version", action="version",
        version="brozzler {} - {}".format(brozzler.__version__, os.path.basename(__file__)))
args = arg_parser.parse_args(args=sys.argv[1:])

logging.basicConfig(stream=sys.stdout, level=args.log_level,
        format="%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s")
logging.getLogger("requests.packages.urllib3").setLevel(logging.WARN)
warnings.simplefilter("ignore", category=requests.packages.urllib3.exceptions.InsecureRequestWarning)
warnings.simplefilter("ignore", category=requests.packages.urllib3.exceptions.InsecurePlatformWarning)

site = brozzler.Site(
        id=-1, seed=args.url, proxy=args.proxy,
        enable_warcprox_features=args.enable_warcprox_features)
page = brozzler.Page(url=args.url, site_id=site.id)
worker = brozzler.BrozzlerWorker(frontier=None)
ydl = worker._youtube_dl(site)

def on_screenshot(screenshot_png):
    OK_CHARS = (string.ascii_letters + string.digits)
    filename = "/tmp/{}-{:%Y%m%d%H%M%S}.png".format(
            "".join(ch if ch in OK_CHARS else "_" for ch in args.url),
            datetime.datetime.now())
    # logging.info("len(screenshot_png)=%s", len(screenshot_png))
    with open(filename, 'wb') as f:
        f.write(screenshot_png)
    logging.info("wrote screenshot to %s", filename)

browser = brozzler.Browser(chrome_exe=args.chrome_exe)
browser.start(proxy=site.proxy)
try:
    outlinks = worker.brozzle_page(
            browser, ydl, site, page, on_screenshot=on_screenshot)
    logging.info("outlinks: \n\t%s", "\n\t".join(sorted(outlinks)))
except brozzler.ReachedLimit as e:
    logging.error("reached limit %s", e)
finally:
    browser.stop()
