#!/usr/bin/env python
# Copyright (c) 2025, Alexei Znamensky (@russoz)
# GNU General Public License v3.0+ (see LICENSE or https://www.gnu.org/licenses/gpl-3.0.txt)
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This script uses only the standard Python library.
#
# However, to run the sanity checks for multiple versions of ansible-core,
# it relies on `andebox`, which is included in the `requirements-test.txt` file
# and it is already installed in the devcontainer.

import html
import re
import subprocess
import sys
import urllib.request
from html.parser import HTMLParser
from pathlib import Path


class TitleParser(HTMLParser):
    def __init__(self, body: str) -> None:
        super().__init__()
        self.in_title = False
        self.title_parts = []
        self.feed(body)

    def handle_starttag(self, tag, attrs):
        if tag.lower() == "title":
            self.in_title = True

    def handle_endtag(self, tag):
        if tag.lower() == "title":
            self.in_title = False

    def handle_data(self, data):
        if self.in_title:
            self.title_parts.append(data)

    @property
    def title(self) -> str:
        return html.unescape("".join(self.title_parts).strip())


sc_desc_cache = {}


def retrieve_sc_description(sc_code: str) -> str:
    try:
        return sc_desc_cache[sc_code]
    except KeyError:
        url = f"https://www.shellcheck.net/wiki/{sc_code}"

        try:
            with urllib.request.urlopen(url, timeout=10) as resp:
                # respect charset if provided
                charset = resp.headers.get_content_charset() or "utf-8"
                body = resp.read().decode(charset, errors="replace")
        except Exception as e:
            sc_desc_cache[sc_code] = f"(failed to fetch: {e})"
            return sc_desc_cache[sc_code]

        parser = TitleParser(body)
        title = parser.title.replace(f"ShellCheck: {sc_code} – ", "").rstrip(".")
        title = f"{title} - {url}"
        sc_desc_cache[sc_code] = title if title else f"(no title found at {url})"

        return sc_desc_cache[sc_code]


def regen_lines(tox_target: str, ignore_version: str) -> None:
    ignore_file = Path("tests/sanity") / f"ignore-{ignore_version}.txt"

    print(f"BEGIN: {ignore_file} ({tox_target}/{ignore_version})")
    print("  remove shellcheck lines", end="", flush=True)

    # Remove shellcheck lines
    try:
        with ignore_file.open("r", encoding="utf-8") as f:
            kept = [ll for ll in f if "shellcheck" not in ll]
        with ignore_file.open("w", encoding="utf-8") as f:
            f.writelines(kept)
    except OSError as e:
        print(f"ERROR handling {ignore_file}: {e}", file=sys.stderr)
        return

    print("\n  run shellcheck", end="", flush=True)

    proc = subprocess.run(
        [
            "andebox",
            "tox-test",
            "-e",
            tox_target,
            "--",
            "sanity",
            "--python",
            "default",
            "--docker",
            "default",
            "--test",
            "shellcheck",
        ],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
    )

    print("\n  generate lines: ", end="", flush=True)

    found = set()
    for line in proc.stdout.splitlines():
        if not line.startswith("ERROR"):
            continue
        parts = line.split(":", 5)
        if "/" not in parts[1]:
            continue

        path = parts[1].strip()
        sc_code = parts[4].strip()
        desc = retrieve_sc_description(sc_code)

        out_line = f"{path} shellcheck:{sc_code}   # {desc}"
        found.add(out_line)
        print(".", end="", flush=True)

    if found:
        try:
            with ignore_file.open("a", encoding="utf-8") as f:
                f.writelines(f"{ll}\n" for ll in sorted(found))

        except OSError as e:
            print(f"ERROR appending to {ignore_file}: {e}", file=sys.stderr)

    print("\n")


def main() -> None:
    if not (sanity_dir := Path("tests") / "sanity").is_dir():
        print(
            "ERROR: You must run this script from the top directory of the project",
            file=sys.stderr,
        )
        sys.exit(1)

    tox_targets = []
    ignore_versions = []
    ig_version_re = re.compile(r".*/ignore-(?P<version>\d\.\d+)\.txt")

    def get_version(filename):
        if not (match := ig_version_re.search(str(filename))):
            raise ValueError(f"ignore filename not recognized: {filename}")
        version = match.group("version")
        return f"ac{version.replace('.', '')}", version

    for ignore_file in (ignore_files := sorted(sanity_dir.glob("ignore-*.txt")))[:-1]:
        tox_target, ignore_version = get_version(ignore_file)
        tox_targets.append(tox_target)
        ignore_versions.append(ignore_version)

    tox_targets.append("dev")
    ignore_versions.append(get_version(ignore_files[-1])[1])

    for tox, ver in zip(tox_targets, ignore_versions):
        regen_lines(tox, ver)


if __name__ == "__main__":
    main()
