package tests_test

import (
	"context"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"strconv"
	"strings"
	"time"

	. "github.com/onsi/ginkgo/v2"
	. "github.com/onsi/gomega"

	snapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v6/apis/volumesnapshot/v1"
	promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"

	corev1 "k8s.io/api/core/v1"
	storagev1 "k8s.io/api/storage/v1"
	"k8s.io/apimachinery/pkg/api/errors"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/types"

	crclient "sigs.k8s.io/controller-runtime/pkg/client"

	cdiv1 "kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1"
	"kubevirt.io/containerized-data-importer/pkg/common"
	"kubevirt.io/containerized-data-importer/pkg/controller"
	cc "kubevirt.io/containerized-data-importer/pkg/controller/common"
	"kubevirt.io/containerized-data-importer/pkg/storagecapabilities"
	"kubevirt.io/containerized-data-importer/tests/framework"
	"kubevirt.io/containerized-data-importer/tests/utils"
)

const (
	metricPollingInterval          = 5 * time.Second
	metricPollingTimeout           = 5 * time.Minute
	metricConsistentPollingTimeout = 2 * time.Minute
)

var _ = Describe("[Destructive] Monitoring Tests", Serial, func() {
	f := framework.NewFramework("monitoring-test")

	var (
		cr                      *cdiv1.CDI
		crModified              bool
		cdiPods                 *corev1.PodList
		defaultStorageClass     *storagev1.StorageClass
		defaultVirtStorageClass *storagev1.StorageClass
		defaultCloneStrategy    *cdiv1.CDICloneStrategy
		numAddedStorageClasses  int
		originalCompleteVal     int
		originalIncompleteVal   int
	)

	waitForStorageProfileMetricInit := func() {
		waitForMetricInit(f, "kubevirt_cdi_storageprofile_info", framework.CdiPodPrefix)

		Eventually(func() bool {
			scs, err := f.K8sClient.StorageV1().StorageClasses().List(context.TODO(), metav1.ListOptions{})
			Expect(err).ToNot(HaveOccurred())
			scCount := len(scs.Items)
			sps, err := f.CdiClient.CdiV1beta1().StorageProfiles().List(context.TODO(), metav1.ListOptions{})
			Expect(err).ToNot(HaveOccurred())
			spCount := len(sps.Items)

			complete := countMetricLabelValue(f, "kubevirt_cdi_storageprofile_info", "complete", "true")
			incomplete := countMetricLabelValue(f, "kubevirt_cdi_storageprofile_info", "complete", "false")

			return scCount == spCount && scCount == complete+incomplete
		}, 2*time.Minute, 1*time.Second).Should(BeTrue())
	}

	updateDefaultStorageClasses := func(boolStr string) {
		By(fmt.Sprintf("Update default storage class annotation to %q", boolStr))
		if defaultStorageClass != nil {
			sc, err := f.K8sClient.StorageV1().StorageClasses().Get(context.TODO(), defaultStorageClass.Name, metav1.GetOptions{})
			Expect(err).ToNot(HaveOccurred())
			sc.Annotations[cc.AnnDefaultStorageClass] = boolStr
			_, err = f.K8sClient.StorageV1().StorageClasses().Update(context.TODO(), sc, metav1.UpdateOptions{})
			Expect(err).ToNot(HaveOccurred())
		}
		if defaultVirtStorageClass != nil {
			sc, err := f.K8sClient.StorageV1().StorageClasses().Get(context.TODO(), defaultVirtStorageClass.Name, metav1.GetOptions{})
			Expect(err).ToNot(HaveOccurred())
			sc.Annotations[cc.AnnDefaultVirtStorageClass] = boolStr
			_, err = f.K8sClient.StorageV1().StorageClasses().Update(context.TODO(), sc, metav1.UpdateOptions{})
			Expect(err).ToNot(HaveOccurred())
		}
	}

	updateDefaultStorageClassProfileClaimPropertySets := func(accessMode *corev1.PersistentVolumeAccessMode) {
		profile, err := f.CdiClient.CdiV1beta1().StorageProfiles().Get(context.TODO(), defaultStorageClass.Name, metav1.GetOptions{})
		Expect(err).ToNot(HaveOccurred())

		profile.Spec.ClaimPropertySets = nil
		if accessMode != nil {
			profile.Spec.ClaimPropertySets = []cdiv1.ClaimPropertySet{{
				AccessModes: []corev1.PersistentVolumeAccessMode{*accessMode},
				VolumeMode:  &cc.BlockMode,
			}}
		}
		_, err = f.CdiClient.CdiV1beta1().StorageProfiles().Update(context.TODO(), profile, metav1.UpdateOptions{})
		Expect(err).ToNot(HaveOccurred())
	}

	deleteUnknownStorageClasses := func() {
		By("Delete unknown storage classes")
		for i := 0; i < numAddedStorageClasses; i++ {
			name := fmt.Sprintf("unknown-sc-%d", i)
			_, err := f.K8sClient.StorageV1().StorageClasses().Get(context.TODO(), name, metav1.GetOptions{})
			if err != nil && errors.IsNotFound(err) {
				continue
			}
			err = f.K8sClient.StorageV1().StorageClasses().Delete(context.TODO(), name, metav1.DeleteOptions{})
			Expect(err).ToNot(HaveOccurred())
		}
	}

	createStubSnapshotClass := func(driver string) {
		err := f.CrClient.Create(context.TODO(), newStubSnapshotClass(driver))
		Expect(err).ToNot(HaveOccurred())
	}

	deleteStubSnapshotClass := func() {
		err := f.CrClient.Delete(context.TODO(), newStubSnapshotClass(""))
		if err != nil && !errors.IsNotFound(err) {
			Expect(err).ToNot(HaveOccurred())
		}
	}

	waitForCloneStrategyInit := func() {
		Eventually(func() *cdiv1.CDICloneStrategy {
			profile, err := f.CdiClient.CdiV1beta1().StorageProfiles().Get(context.TODO(), defaultStorageClass.Name, metav1.GetOptions{})
			Expect(err).ToNot(HaveOccurred())
			defaultCloneStrategy = profile.Status.CloneStrategy
			return defaultCloneStrategy
		}, 2*time.Minute, 5*time.Second).ShouldNot(BeNil())
	}

	waitForCloneStrategy := func(strategy cdiv1.CDICloneStrategy) {
		Eventually(func() bool {
			profile, err := f.CdiClient.CdiV1beta1().StorageProfiles().Get(context.TODO(), defaultStorageClass.Name, metav1.GetOptions{})
			Expect(err).ToNot(HaveOccurred())
			cs := profile.Status.CloneStrategy
			return cs != nil && *cs == strategy
		}, 2*time.Minute, 5*time.Second).Should(BeTrue())
	}

	BeforeEach(func() {
		if !f.IsPrometheusAvailable() {
			Skip("This test depends on prometheus infra being available")
		}

		cr = getCDI(f)
		cdiPods = getCDIPods(f)
		defaultStorageClass = utils.GetDefaultStorageClass(f.K8sClient)
		defaultVirtStorageClass = utils.GetDefaultVirtStorageClass(f.K8sClient)

		waitForStorageProfileMetricInit()
		originalCompleteVal = countMetricLabelValue(f, "kubevirt_cdi_storageprofile_info", "complete", "true")
		originalIncompleteVal = countMetricLabelValue(f, "kubevirt_cdi_storageprofile_info", "complete", "false")
		By(fmt.Sprintf("Original complete:%d incomplete:%d", originalCompleteVal, originalIncompleteVal))
		waitForCloneStrategyInit()
	})

	AfterEach(func() {
		if crModified {
			removeCDI(f, cr)
			ensureCDI(f, cr, cdiPods)
			crModified = false
		}

		deleteUnknownStorageClasses()
		updateDefaultStorageClasses("true")
		waitForStorageProfileMetricInit()
		updateDefaultStorageClassProfileClaimPropertySets(nil)
		deleteStubSnapshotClass()
		waitForCloneStrategy(*defaultCloneStrategy)

		Eventually(func() int {
			return countMetricLabelValue(f, "kubevirt_cdi_storageprofile_info", "complete", "true")
		}, 5*time.Minute, 5*time.Second).Should(BeNumerically("==", originalCompleteVal))
		Eventually(func() int {
			return countMetricLabelValue(f, "kubevirt_cdi_storageprofile_info", "complete", "false")
		}, 5*time.Minute, 5*time.Second).Should(BeNumerically("==", originalIncompleteVal))
	})

	Context("[rfe_id:7101][crit:medium][vendor:cnv-qe@redhat.com][level:component] Metrics and Alert tests", func() {

		It("[test_id:9656] Metric kubevirt_cdi_cr_ready is 0 when CDI is not ready", func() {
			Eventually(func() int {
				return getMetricValue(f, "kubevirt_cdi_cr_ready")
			}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", 1))

			crModified = true
			removeCDI(f, cr)

			By("Creating new CDI with wrong NodeSelector")
			cdi := &cdiv1.CDI{
				ObjectMeta: metav1.ObjectMeta{
					Name: cr.Name,
				},
				Spec: cr.Spec,
			}
			cdi.Spec.Infra.NodePlacement.NodeSelector = map[string]string{"wrong": "wrong"}
			_, err := f.CdiClient.CdiV1beta1().CDIs().Create(context.TODO(), cdi, metav1.CreateOptions{})
			Expect(err).ToNot(HaveOccurred())

			By("Wait for kubevirt_cdi_cr_ready == 0")
			Eventually(func() int {
				return getMetricValue(f, "kubevirt_cdi_cr_ready")
			}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", 0))

			waitForPrometheusAlert(f, "CDINotReady")

			By("Revert CDI CR changes")
			cdi, err = f.CdiClient.CdiV1beta1().CDIs().Get(context.TODO(), cr.Name, metav1.GetOptions{})
			Expect(err).ToNot(HaveOccurred())
			cdi.Spec = cr.Spec
			_, err = f.CdiClient.CdiV1beta1().CDIs().Update(context.TODO(), cdi, metav1.UpdateOptions{})
			Expect(err).ToNot(HaveOccurred())
			waitCDI(f, cr, cdiPods)
			crModified = false

			By("Wait for kubevirt_cdi_cr_ready == 1")
			Eventually(func() int {
				return getMetricValue(f, "kubevirt_cdi_cr_ready")
			}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", 1))
		})

		It("[test_id:7963] CDI ready metric value as expected when ready to use", func() {
			Eventually(func() int {
				return getMetricValue(f, "kubevirt_cdi_cr_ready")
			}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", 1))
		})

		It("[test_id:7965] StorageProfile incomplete metric expected value when creating an incomplete profile", func() {
			defaultStorageClass := utils.GetDefaultStorageClass(f.K8sClient)
			defaultStorageClassProfile := &cdiv1.StorageProfile{}
			err := f.CrClient.Get(context.TODO(), types.NamespacedName{Name: defaultStorageClass.Name}, defaultStorageClassProfile)
			Expect(err).ToNot(HaveOccurred())

			numAddedStorageClasses = 2
			for i := 0; i < numAddedStorageClasses; i++ {
				_, err = f.K8sClient.StorageV1().StorageClasses().Create(context.TODO(), createUnknownStorageClass(fmt.Sprintf("unknown-sc-%d", i), "kubernetes.io/non-existent-provisioner"), metav1.CreateOptions{})
				Expect(err).ToNot(HaveOccurred())
			}

			expectedIncomplete := originalIncompleteVal + numAddedStorageClasses
			Eventually(func() int {
				return countMetricLabelValue(f, "kubevirt_cdi_storageprofile_info", "complete", "false")
			}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", expectedIncomplete))

			waitForPrometheusAlert(f, "CDIStorageProfilesIncomplete")

			By("Fix profiles to be complete and test metric value equals original")
			for i := 0; i < numAddedStorageClasses; i++ {
				profile := &cdiv1.StorageProfile{}
				err = f.CrClient.Get(context.TODO(), types.NamespacedName{Name: fmt.Sprintf("unknown-sc-%d", i)}, profile)
				Expect(err).ToNot(HaveOccurred())
				// These might be wrong values, but at least the profile is no longer incomplete
				profile.Spec.ClaimPropertySets = defaultStorageClassProfile.Status.ClaimPropertySets
				err = f.CrClient.Update(context.TODO(), profile)
				Expect(err).ToNot(HaveOccurred())
				expectedIncomplete--
				Eventually(func() int {
					return countMetricLabelValue(f, "kubevirt_cdi_storageprofile_info", "complete", "false")
				}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", expectedIncomplete))
			}
		})

		It("[test_id:10512]CDIMultipleDefaultVirtStorageClasses fired when more than one default virt storage class exists", func() {
			waitForNoPrometheusAlert(f, "CDIMultipleDefaultVirtStorageClasses")

			numAddedStorageClasses = 2
			for i := 0; i < numAddedStorageClasses; i++ {
				sc := createUnknownStorageClass(fmt.Sprintf("unknown-sc-%d", i), "kubernetes.io/non-existent-provisioner")
				cc.AddAnnotation(sc, cc.AnnDefaultVirtStorageClass, "true")
				_, err := f.K8sClient.StorageV1().StorageClasses().Create(context.TODO(), sc, metav1.CreateOptions{})
				Expect(err).ToNot(HaveOccurred())
			}

			waitForPrometheusAlert(f, "CDIMultipleDefaultVirtStorageClasses")

			deleteUnknownStorageClasses()

			waitForNoPrometheusAlert(f, "CDIMultipleDefaultVirtStorageClasses")
		})

		DescribeTable("CDINoDefaultStorageClass fired when no default storage class exists, and a DataVolume is waiting for one", func(withAccessModes bool) {
			By("Ensure initial metric values")
			defaultSCs := countMetricLabelValue(f, "kubevirt_cdi_storageprofile_info", "default", "true")
			Expect(defaultSCs).To(Equal(1))
			defaultVSCs := countMetricLabelValue(f, "kubevirt_cdi_storageprofile_info", "virtdefault", "true")
			Expect(defaultVSCs).To(BeNumerically("<=", 1))
			waitingDVs := getMetricValue(f, "kubevirt_cdi_datavolume_pending")
			Expect(waitingDVs).To(BeZero())

			updateDefaultStorageClasses("false")

			dv := utils.NewDataVolumeWithHTTPImportAndStorageSpec("test-dv", "1Gi", fmt.Sprintf(utils.TinyCoreQcow2URL, f.CdiInstallNs))
			if !withAccessModes {
				dv.Spec.Storage.AccessModes = nil
			}
			_, err := utils.CreateDataVolumeFromDefinition(f.CdiClient, f.Namespace.Name, dv)
			Expect(err).ToNot(HaveOccurred())

			By("Ensure metric values after removing default storage class annotation")
			Eventually(func() int {
				return countMetricLabelValue(f, "kubevirt_cdi_storageprofile_info", "default", "true")
			}, metricPollingTimeout, metricPollingInterval).Should(BeZero())
			Eventually(func() int {
				return countMetricLabelValue(f, "kubevirt_cdi_storageprofile_info", "virtdefault", "true")
			}, metricPollingTimeout, metricPollingInterval).Should(BeZero())
			Eventually(func() int {
				return getMetricValue(f, "kubevirt_cdi_datavolume_pending")
			}, metricPollingTimeout, metricPollingInterval).Should(Equal(1))

			waitForPrometheusAlert(f, "CDINoDefaultStorageClass")

			updateDefaultStorageClasses("true")

			By("Ensure metric values after restoring default storage class annotation")
			Eventually(func() int {
				return countMetricLabelValue(f, "kubevirt_cdi_storageprofile_info", "default", "true")
			}, metricPollingTimeout, metricPollingInterval).Should(Equal(defaultSCs))
			Eventually(func() int {
				return countMetricLabelValue(f, "kubevirt_cdi_storageprofile_info", "virtdefault", "true")
			}, metricPollingTimeout, metricPollingInterval).Should(Equal(defaultVSCs))
			Eventually(func() int {
				return getMetricValue(f, "kubevirt_cdi_datavolume_pending")
			}, metricPollingTimeout, metricPollingInterval).Should(BeZero())

			waitForNoPrometheusAlert(f, "CDINoDefaultStorageClass")
		},
			Entry("[test_id:10719]with AccessModes, so Pending PVC is created", true),
			Entry("[test_id:XXXXX]without AccessModes, so PVC is not created until default storage class exists", false),
		)

		It("[test_id:10720]CDIDefaultStorageClassDegraded fired when there is a default storage class, but it has no smart clone or ReadWriteMany", func() {
			rwx := corev1.ReadWriteMany
			rwo := corev1.ReadWriteOnce
			isStubSnapshotClass := false

			profile, err := f.CdiClient.CdiV1beta1().StorageProfiles().Get(context.TODO(), defaultStorageClass.Name, metav1.GetOptions{})
			Expect(err).ToNot(HaveOccurred())

			if f.SnapshotSCName != defaultStorageClass.Name && f.CsiCloneSCName != defaultStorageClass.Name {
				waitForPrometheusAlert(f, "CDIDefaultStorageClassDegraded")

				By("Default storage class does not support snapshot or CSI clone - adding stub VolumeSnapshot crds and VolumeSnapshotClass")
				createStubSnapshotClass(*profile.Status.Provisioner)
				waitForCloneStrategy(cdiv1.CloneStrategySnapshot)

				isStubSnapshotClass = true
			}

			if !isStubSnapshotClass && !f.IsSnapshotStorageClassAvailable() && !f.IsCSIVolumeCloneStorageClassAvailable() {
				Skip("Smart Clone is not applicable")
			}

			hasRWX := hasRWX(profile)
			if !hasRWX {
				waitForPrometheusAlert(f, "CDIDefaultStorageClassDegraded")
				By("Default storage class profile has no ReadWriteMany access mode - adding stub ReadWriteMany")
				updateDefaultStorageClassProfileClaimPropertySets(&rwx)
			}

			waitForNoPrometheusAlert(f, "CDIDefaultStorageClassDegraded")

			By("Remove storage profile ReadWriteMany access mode")
			updateDefaultStorageClassProfileClaimPropertySets(&rwo)

			waitForPrometheusAlert(f, "CDIDefaultStorageClassDegraded")

			By("Verify the alert stops when no default storage class")
			updateDefaultStorageClasses("false")
			waitForNoPrometheusAlert(f, "CDIDefaultStorageClassDegraded")

			By("Verify the alert fires when default storage class is set")
			updateDefaultStorageClasses("true")
			waitForPrometheusAlert(f, "CDIDefaultStorageClassDegraded")

			By("Restore storage profile")
			if hasRWX {
				updateDefaultStorageClassProfileClaimPropertySets(nil)
			} else {
				updateDefaultStorageClassProfileClaimPropertySets(&rwx)
			}

			waitForNoPrometheusAlert(f, "CDIDefaultStorageClassDegraded")
		})

		It("[test_id:9659] StorageProfile incomplete metric expected value remains unchanged for provisioner known to not work", func() {
			sc, err := f.K8sClient.StorageV1().StorageClasses().Create(context.TODO(), createUnknownStorageClass("unknown-sc-0", storagecapabilities.ProvisionerNoobaa), metav1.CreateOptions{})
			Expect(err).ToNot(HaveOccurred())
			numAddedStorageClasses = 1

			By("Profile created and is incomplete")
			profile := &cdiv1.StorageProfile{}
			Eventually(func() error {
				return f.CrClient.Get(context.TODO(), types.NamespacedName{Name: sc.Name}, profile)
			}, 20*time.Second, 1*time.Second).Should(Succeed())
			Expect(profile.Status.ClaimPropertySets).To(BeNil())

			By("Metric stays the same because we don't support this provisioner")
			Consistently(func() int {
				return countMetricLabelValue(f, "kubevirt_cdi_storageprofile_info", "complete", "false")
			}, metricConsistentPollingTimeout, metricPollingInterval).Should(BeNumerically("==", originalIncompleteVal))
		})

		It("[test_id:7964] DataImportCron failing metric expected value when patching DesiredDigest annotation with junk sha256 value", func() {
			numCrons := 2
			originalCronMetricVal := getMetricValueWithDefault(f, "kubevirt_cdi_dataimportcron_outdated", true)
			Expect(originalCronMetricVal).To(BeZero())

			waitForNoPrometheusAlert(f, "CDIDataImportCronOutdated")

			reg, err := getDataVolumeSourceRegistry(f)
			Expect(err).ToNot(HaveOccurred())
			defer func() {
				if err := utils.RemoveInsecureRegistry(f.CrClient, *reg.URL); err != nil {
					_, _ = fmt.Fprintf(GinkgoWriter, "failed to remove registry; %v", err)
				}
			}()

			for i := 1; i < numCrons+1; i++ {
				cron := utils.NewDataImportCron(fmt.Sprintf("cron-test-%d", i), "5Gi", scheduleOnceAYear, fmt.Sprintf("datasource-test-%d", i), 1, *reg)
				By(fmt.Sprintf("Create new DataImportCron %s", *reg.URL))
				cron, err = f.CdiClient.CdiV1beta1().DataImportCrons(f.Namespace.Name).Create(context.TODO(), cron, metav1.CreateOptions{})
				Expect(err).ToNot(HaveOccurred())

				By("Wait for condition UpToDate=true on DataImportCron")
				Eventually(func() bool {
					cron, err = f.CdiClient.CdiV1beta1().DataImportCrons(f.Namespace.Name).Get(context.TODO(), cron.Name, metav1.GetOptions{})
					Expect(err).ToNot(HaveOccurred())
					upToDateCondition := controller.FindDataImportCronConditionByType(cron, cdiv1.DataImportCronUpToDate)
					return upToDateCondition != nil && upToDateCondition.ConditionState.Status == corev1.ConditionTrue
				}, dataImportCronTimeout, pollingInterval).Should(BeTrue())

				Eventually(func() error {
					cron, err = f.CdiClient.CdiV1beta1().DataImportCrons(f.Namespace.Name).Get(context.TODO(), cron.Name, metav1.GetOptions{})
					Expect(err).ToNot(HaveOccurred())
					if cron.Annotations == nil {
						cron.Annotations = make(map[string]string)
					}
					// not a real hash

					cron.Annotations[controller.AnnSourceDesiredDigest] = "sha256:c616e1c85568b1f1d528da2b3dbc257fd2035ada441e286a8c42606491442a5d"
					cron, err = f.CdiClient.CdiV1beta1().DataImportCrons(f.Namespace.Name).Update(context.TODO(), cron, metav1.UpdateOptions{})
					return err
				}, dataImportCronTimeout, pollingInterval).Should(Succeed())
				By(fmt.Sprintf("Ensuring metric value incremented to %d", i))
				Eventually(func() int {
					return getMetricValue(f, "kubevirt_cdi_dataimportcron_outdated")
				}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", i))
			}

			waitForPrometheusAlert(f, "CDIDataImportCronOutdated")

			By("Ensure metric value decrements when crons are cleaned up")
			for i := 1; i < numCrons+1; i++ {
				err = f.CdiClient.CdiV1beta1().DataImportCrons(f.Namespace.Name).Delete(context.TODO(), fmt.Sprintf("cron-test-%d", i), metav1.DeleteOptions{})
				Expect(err).ToNot(HaveOccurred())

				Eventually(func() int {
					return getMetricValueWithDefault(f, "kubevirt_cdi_dataimportcron_outdated", true)
				}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", numCrons-i))
			}

			waitForNoPrometheusAlert(f, "CDIDataImportCronOutdated")
		})

		It("[test_id:7962] CDIOperatorDown alert firing when operator scaled down", func() {
			deploymentName := "cdi-operator"
			By("Scale down operator so alert will trigger")
			originalReplicas := scaleDeployment(f, deploymentName, 0)
			Eventually(func() bool {
				dep, err := f.K8sClient.AppsV1().Deployments(f.CdiInstallNs).Get(context.TODO(), deploymentName, metav1.GetOptions{})
				Expect(err).ToNot(HaveOccurred())
				return dep.Status.Replicas == 0
			}, 20*time.Second, 1*time.Second).Should(BeTrue())

			By("Waiting for cluster:kubevirt_cdi_operator_up:sum metric to be 0")
			Eventually(func() int {
				return getMetricValue(f, "cluster:kubevirt_cdi_operator_up:sum")
			}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", 0))

			waitForPrometheusAlert(f, "CDIOperatorDown")

			By("Ensuring original value of replicas restored")
			scaleDeployment(f, deploymentName, originalReplicas)
			err := utils.WaitForDeploymentReplicasReady(f.K8sClient, f.CdiInstallNs, deploymentName)
			Expect(err).ToNot(HaveOccurred())
		})
	})

	Context("Prometheus Rule configuration", func() {
		It("[test_id:8259] Alerts should have all the required annotations", func() {
			promRule := getPrometheusRule(f)
			for _, group := range promRule.Spec.Groups {
				if group.Name == "cdi.rules" {
					for _, rule := range group.Rules {
						if rule.Alert != "" {
							Expect(rule.Annotations).ToNot(BeNil())
							checkRequiredAnnotations(rule)
						}
					}
				}
			}
		})

		It("[test_id:8812] Alerts should have all the required labels", func() {
			promRule := getPrometheusRule(f)
			for _, group := range promRule.Spec.Groups {
				if group.Name == "cdi.rules" {
					for _, rule := range group.Rules {
						if rule.Alert != "" {
							Expect(rule.Labels).ToNot(BeNil())
							checkRequiredLabels(rule)
						}
					}
				}
			}
		})
	})
})

func dataVolumeUnusualRestartTest(f *framework.Framework) {
	By("Test metric for unusual restart count")
	Eventually(func() bool {
		return getMetricValue(f, "cluster:kubevirt_cdi_import_pods_high_restart:count") == 1
	}, 2*time.Minute, 1*time.Second).Should(BeTrue())

	waitForPrometheusAlert(f, "CDIDataVolumeUnusualRestartCount")
}

func dataVolumeNoUnusualRestartTest(f *framework.Framework) {
	By("Test metric for no unusual restart count")
	Eventually(func() bool {
		return getMetricValue(f, "cluster:kubevirt_cdi_import_pods_high_restart:count") == 0
	}, 2*time.Minute, 1*time.Second).Should(BeTrue())

	waitForNoPrometheusAlert(f, "CDIDataVolumeUnusualRestartCount")
}

// Helper functions

// getMetricValue returns the metric value, or the sum in case of multiple values (GaugeVec)
func getMetricValue(f *framework.Framework, endpoint string) int {
	return getMetricValueWithDefault(f, endpoint, false)
}

// getMetricValueWithDefault returns the metric value, or the sum in case of multiple values (GaugeVec)
// if the metric does not exist and useDefault is true, it immediately returns 0
func getMetricValueWithDefault(f *framework.Framework, endpoint string, useDefault bool) int {
	returnVal := 0

	Eventually(func() bool {
		dataResult, err := getMetricDataResult(f, endpoint)
		if err != nil {
			return false
		}
		if dataResult == nil {
			return useDefault
		}

		found := false
		returnVal = 0
		for _, res := range dataResult {
			values := res.(map[string]interface{})["value"].([]interface{})
			for _, v := range values {
				if s, ok := v.(string); ok {
					i, err := strconv.Atoi(s)
					Expect(err).ToNot(HaveOccurred())
					returnVal += i
					found = true
				}
			}
		}
		return found
	}, 1*time.Minute, 1*time.Second).Should(BeTrue())

	return returnVal
}

func countMetricLabelValue(f *framework.Framework, endpoint, label, value string) int {
	var count int

	Eventually(func() bool {
		dataResult, err := getMetricDataResult(f, endpoint)
		if err != nil || dataResult == nil {
			return false
		}

		found := false
		count = 0
		for _, res := range dataResult {
			metric := res.(map[string]interface{})["metric"].(map[string]interface{})
			val, ok := metric[label]
			if !ok {
				continue
			}

			values := res.(map[string]interface{})["value"].([]interface{})
			for _, v := range values {
				s, ok := v.(string)
				if !ok {
					continue
				}
				i, err := strconv.Atoi(s)
				Expect(err).ToNot(HaveOccurred())
				if i == 1 && val == value {
					count++
				}
				found = true
				break
			}
		}
		return found
	}, 1*time.Minute, 1*time.Second).Should(BeTrue())

	return count
}

func waitForMetricInit(f *framework.Framework, endpoint, podPrefix string) {
	pod, err := utils.FindPodByPrefix(f.K8sClient, f.CdiInstallNs, podPrefix, common.CDILabelSelector)
	Expect(err).NotTo(HaveOccurred())
	podIP := pod.Status.PodIP

	Eventually(func() bool {
		dataResult, err := getMetricDataResult(f, endpoint)
		if err != nil || dataResult == nil {
			return false
		}
		for _, res := range dataResult {
			metric := res.(map[string]interface{})["metric"].(map[string]interface{})
			instance, ok := metric["instance"].(string)
			if !ok || !strings.Contains(instance, podIP) {
				return false
			}
		}
		return true
	}, 5*time.Minute, 5*time.Second).Should(BeTrue())
}

func getMetricDataResult(f *framework.Framework, endpoint string) ([]interface{}, error) {
	var result map[string]interface{}

	resp := f.MakePrometheusHTTPRequest("query?query=" + endpoint)
	defer resp.Body.Close()
	bodyBytes, err := io.ReadAll(resp.Body)
	if err != nil {
		return nil, err
	}
	if err = json.Unmarshal(bodyBytes, &result); err != nil {
		return nil, err
	}
	dataResult := result["data"].(map[string]interface{})["result"].([]interface{})
	if len(dataResult) == 0 {
		return nil, nil
	}
	return dataResult, nil
}

func createUnknownStorageClass(name, provisioner string) *storagev1.StorageClass {
	immediateBinding := storagev1.VolumeBindingImmediate

	return &storagev1.StorageClass{
		ObjectMeta: metav1.ObjectMeta{
			Name: name,
			Labels: map[string]string{
				"cdi.kubevirt.io/testing": "",
			},
		},
		Provisioner:       provisioner,
		VolumeBindingMode: &immediateBinding,
	}
}

func getPrometheusRule(f *framework.Framework) *promv1.PrometheusRule {
	By("Wait for prometheus-cdi-rules")
	promRule := &promv1.PrometheusRule{
		ObjectMeta: metav1.ObjectMeta{
			Name:      "prometheus-cdi-rules",
			Namespace: f.CdiInstallNs,
		},
	}
	Eventually(func() error {
		return f.CrClient.Get(context.TODO(), crclient.ObjectKeyFromObject(promRule), promRule)
	}, 5*time.Minute, 1*time.Second).Should(Succeed())
	return promRule
}

func waitForPrometheusAlert(f *framework.Framework, alertName string) {
	By(fmt.Sprintf("Wait for alert %s to be triggered", alertName))
	waitForPrometheusAlertExists(f, alertName, true)
}

func waitForNoPrometheusAlert(f *framework.Framework, alertName string) {
	By(fmt.Sprintf("Verify no %s alert", alertName))
	waitForPrometheusAlertExists(f, alertName, false)
}

func waitForPrometheusAlertExists(f *framework.Framework, alertName string, shouldExist bool) {
	Eventually(func() bool {
		return checkPrometheusAlertExists(f, alertName, shouldExist)
	}, 2*time.Minute, 1*time.Second).Should(BeTrue())
}

func checkPrometheusAlertExists(f *framework.Framework, alertName string, shouldExist bool) bool {
	result, err := getPrometheusAlerts(f)
	if err != nil {
		return false
	}

	alerts := result["data"].(map[string]interface{})["alerts"].([]interface{})
	for _, alert := range alerts {
		name := alert.(map[string]interface{})["labels"].(map[string]interface{})["alertname"].(string)
		if name == alertName {
			if shouldExist {
				state := alert.(map[string]interface{})["state"].(string)
				By(fmt.Sprintf("Alert %s state %s", name, state))
				return state == "pending" || state == "firing"
			}
			return false
		}
	}

	return !shouldExist
}

func getPrometheusAlerts(f *framework.Framework) (alerts map[string]interface{}, err error) {
	resp := f.MakePrometheusHTTPRequest("alerts")
	defer resp.Body.Close()

	bodyBytes, err := io.ReadAll(resp.Body)
	if err != nil {
		return alerts, err
	}

	err = json.Unmarshal(bodyBytes, &alerts)
	if err != nil {
		return alerts, err
	}

	return alerts, nil
}

func checkRequiredAnnotations(rule promv1.Rule) {
	ExpectWithOffset(1, rule.Annotations).To(HaveKeyWithValue("summary", Not(BeEmpty())),
		"%s summary is missing or empty", rule.Alert)
	ExpectWithOffset(1, rule.Annotations).To(HaveKey("runbook_url"),
		"%s runbook_url is missing", rule.Alert)
	ExpectWithOffset(1, rule.Annotations).To(HaveKeyWithValue("runbook_url", ContainSubstring(rule.Alert)),
		"%s runbook_url doesn't include alert name", rule.Alert)

	resp, err := http.Head(rule.Annotations["runbook_url"])
	ExpectWithOffset(1, err).ToNot(HaveOccurred(), fmt.Sprintf("%s runbook is not available", rule.Alert))
	ExpectWithOffset(1, resp.StatusCode).Should(Equal(http.StatusOK), fmt.Sprintf("%s runbook is not available", rule.Alert))
}

func checkRequiredLabels(rule promv1.Rule) {
	ExpectWithOffset(1, rule.Labels).To(HaveKeyWithValue("severity", BeElementOf("info", "warning", "critical")),
		"%s severity label is missing or not valid", rule.Alert)
	ExpectWithOffset(1, rule.Labels).To(HaveKeyWithValue("operator_health_impact", BeElementOf("none", "warning", "critical")),
		"%s operator_health_impact label is missing or not valid", rule.Alert)
	ExpectWithOffset(1, rule.Labels).To(HaveKeyWithValue("kubernetes_operator_part_of", "kubevirt"),
		"%s kubernetes_operator_part_of label is missing or not valid", rule.Alert)
	ExpectWithOffset(1, rule.Labels).To(HaveKeyWithValue("kubernetes_operator_component", "containerized-data-importer"),
		"%s kubernetes_operator_component label is missing or not valid", rule.Alert)
}

func newStubSnapshotClass(driver string) *snapshotv1.VolumeSnapshotClass {
	return &snapshotv1.VolumeSnapshotClass{
		ObjectMeta: metav1.ObjectMeta{
			Name: "test-vsc",
		},
		Driver:         driver,
		DeletionPolicy: snapshotv1.VolumeSnapshotContentDelete,
	}
}

func hasRWX(profile *cdiv1.StorageProfile) bool {
	for _, cps := range profile.Status.ClaimPropertySets {
		for _, am := range cps.AccessModes {
			if am == corev1.ReadWriteMany {
				return true
			}
		}
	}
	return false
}
