# Copyright © The Debusine Developers
# See the AUTHORS file at the top-level directory of this distribution
#
# This file is part of Debusine. It is subject to the license terms
# in the LICENSE file found in the top-level directory of this
# distribution. No part of Debusine, including this file, may be copied,
# modified, propagated, or distributed except according to the terms
# contained in the LICENSE file.

"""lintian workflow."""

from typing import Any

from debian.debian_support import Version
from django.utils import timezone

from debusine.artifacts import LintianArtifact
from debusine.artifacts.models import (
    ArtifactCategory,
    DebianBinaryPackage,
    DebianSourcePackage,
    DebianUpload,
    get_source_package_version,
)
from debusine.db.models import Artifact, ArtifactRelation, WorkRequest
from debusine.server.workflows import workflow_utils
from debusine.server.workflows.models import (
    LintianWorkflowData,
    WorkRequestWorkflowData,
)
from debusine.server.workflows.regression_tracking import (
    RegressionTrackingWorkflow,
)
from debusine.tasks.models import (
    ActionUpdateCollectionWithArtifacts,
    BackendType,
    BaseDynamicTaskData,
    LintianData,
    LintianFailOnSeverity,
    LintianInput,
    LintianOutput,
    LookupMultiple,
    LookupSingle,
    OutputData,
    RegressionAnalysis,
    RegressionAnalysisStatus,
)
from debusine.tasks.server import TaskDatabaseInterface


class LintianWorkflow(
    RegressionTrackingWorkflow[LintianWorkflowData, BaseDynamicTaskData]
):
    """Lintian workflow."""

    TASK_NAME = "lintian"

    def _has_current_reference_qa_result(
        self, *, architecture: str, build_architecture: str
    ) -> bool:
        """
        Return True iff we have a current reference QA result.

        A lintian analysis is outdated if:

        * either the underlying source or binary packages are outdated (i.e.
          have different version numbers) compared to what's available in
          the ``debian:suite`` collection
        * or the lintian version used to perform the analysis is older than
          the version available in the ``debian:suite`` collection

        Otherwise, it is current.
        """
        # This method is only called when update_qa_results is True, in
        # which case these are checked by a model validator.
        assert self.qa_suite is not None
        assert self.reference_qa_results is not None

        source_data = workflow_utils.source_package_data(self)
        latest_result = self.reference_qa_results.manager.lookup(
            f"latest:lintian_{source_data.name}_{architecture}"
        )
        reference_lintian = self.qa_suite.manager.lookup(
            f"binary:lintian_{build_architecture}"
        )
        return (
            latest_result is not None
            and latest_result.artifact is not None
            and latest_result.data["version"] == source_data.version
            and (
                reference_lintian is None
                or Version(
                    LintianArtifact.create_data(
                        latest_result.artifact.data
                    ).summary.lintian_version
                )
                >= Version(reference_lintian.data["version"])
            )
        )

    def populate(self) -> None:
        """Create work requests."""
        architectures = workflow_utils.get_architectures(
            self, self.data.binary_artifacts
        )

        if (data_archs := self.data.architectures) is not None:
            architectures.intersection_update(data_archs)

        if architectures != {"all"}:
            architectures = architectures - {"all"}

        if not architectures and not self.data.binary_artifacts:
            # There are no architectures to run on, but that's because the
            # workflow was started with an empty `binary_artifacts`.  In
            # that case there's no point waiting for binary packages to be
            # built, so we might as well just analyze the source package.
            architectures = {"all"}

        # Pick a preferred architecture to produce the common source and
        # binary-all analysis artifacts, to avoid redundancy.  Note that we
        # still pass source and binary-all artifacts to all child work
        # requests so that `lintian` has the best tag coverage available.
        source_all_analysis_architecture: str | None = None
        if "all" in architectures:
            source_all_analysis_architecture = "all"
        elif self.data.arch_all_build_architecture in architectures:
            source_all_analysis_architecture = (
                self.data.arch_all_build_architecture
            )
        elif architectures:
            # Alphabetical sorting doesn't necessarily produce good results
            # here, but this is already a last-ditch fallback case so we
            # don't worry about it too much.  If it turns out to be wrong,
            # it's probably best to just set `arch_all_build_architecture`.
            source_all_analysis_architecture = sorted(architectures)[0]

        environment = f"{self.data.vendor}/match:codename={self.data.codename}"

        for arch in sorted(architectures):
            output = self.data.output.copy()
            if arch != source_all_analysis_architecture:
                output.source_analysis = False
                output.binary_all_analysis = False

            if (
                not output.source_analysis
                and not output.binary_all_analysis
                and not output.binary_any_analysis
            ):
                continue

            source_artifact = (
                workflow_utils.locate_debian_source_package_lookup(
                    self, "source_artifact", self.data.source_artifact
                )
            )
            filtered_binary_artifacts = (
                workflow_utils.filter_artifact_lookup_by_arch(
                    self, self.data.binary_artifacts, (arch, "all")
                )
            )

            self._populate_lintian(
                source_artifact=source_artifact,
                binary_artifacts=filtered_binary_artifacts,
                output=output,
                build_architecture=(
                    self.data.arch_all_build_architecture
                    if arch == "all"
                    else arch
                ),
                environment=environment,
                backend=self.data.backend,
                architecture=arch,
                include_tags=self.data.include_tags,
                exclude_tags=self.data.exclude_tags,
                fail_on_severity=self.data.fail_on_severity,
                target_distribution=f"{self.data.vendor}:{self.data.codename}",
            )

    def _populate_lintian(
        self,
        *,
        source_artifact: LookupSingle,
        binary_artifacts: LookupMultiple,
        output: LintianOutput,
        build_architecture: str,
        environment: str,
        backend: BackendType,
        include_tags: list[str],
        exclude_tags: list[str],
        fail_on_severity: LintianFailOnSeverity,
        architecture: str,
        target_distribution: str,
    ) -> None:
        """Create work request for Lintian for a specific architecture."""
        if (
            self.data.update_qa_results
            and self._has_current_reference_qa_result(
                architecture=architecture, build_architecture=build_architecture
            )
        ):
            return

        workflow_data_kwargs: dict[str, Any] = {}
        if self.data.update_qa_results:
            # When updating reference results for regression tracking, task
            # failures never cause the parent workflow or dependent tasks to
            # fail.
            workflow_data_kwargs["allow_failure"] = True
        wr = self.work_request_ensure_child_worker(
            task_name="lintian",
            task_data=LintianData(
                input=LintianInput(
                    source_artifact=source_artifact,
                    binary_artifacts=binary_artifacts,
                ),
                output=output,
                build_architecture=build_architecture,
                environment=environment,
                backend=backend,
                include_tags=include_tags,
                exclude_tags=exclude_tags,
                fail_on_severity=fail_on_severity,
                target_distribution=target_distribution,
            ),
            workflow_data=WorkRequestWorkflowData(
                display_name=f"Lintian for {architecture}",
                step=f"lintian-{architecture}",
                **workflow_data_kwargs,
            ),
        )
        self.requires_artifact(wr, source_artifact)
        self.requires_artifact(wr, binary_artifacts)
        promise_name = f"{self.data.prefix}lintian-{architecture}"
        self.provides_artifact(wr, ArtifactCategory.LINTIAN, promise_name)

        if self.data.update_qa_results:
            # Checked by a model validator.
            assert self.data.reference_qa_results is not None

            source_data = workflow_utils.source_package_data(self)

            # Back off if another workflow gets there first.
            self.skip_if_qa_result_changed(
                wr,
                package=source_data.name,
                architecture=architecture,
                promise_name=promise_name,
            )

            # Record results in the reference collection.
            action = ActionUpdateCollectionWithArtifacts(
                collection=self.data.reference_qa_results,
                variables={
                    "package": source_data.name,
                    "version": source_data.version,
                    # While this field is technically optional at the
                    # moment, it will be present in any newly-created
                    # artifacts.
                    "$architecture": "architecture",
                    "timestamp": int(
                        (self.qa_suite_changed or timezone.now()).timestamp()
                    ),
                    "work_request_id": wr.id,
                },
                artifact_filters={"category": ArtifactCategory.LINTIAN},
            )
            wr.add_event_reaction("on_success", action)
            wr.add_event_reaction("on_failure", action)

        if self.data.enable_regression_tracking:
            # Checked by a model validator.
            assert self.data.reference_prefix

            regression_analysis = self.work_request_ensure_child_internal(
                task_name="workflow",
                workflow_data=WorkRequestWorkflowData(
                    allow_dependency_failures=True,
                    step="regression-analysis",
                    display_name=f"Regression analysis for {architecture}",
                    visible=False,
                ),
            )
            try:
                self.requires_artifact(
                    regression_analysis,
                    f"internal@collections/name:"
                    f"{self.data.reference_prefix}lintian-{architecture}",
                )
            except KeyError:
                pass
            regression_analysis.add_dependency(wr)
            self.orchestrate_child(regression_analysis)

    def build_dynamic_data(
        self, task_database: TaskDatabaseInterface  # noqa: U100
    ) -> BaseDynamicTaskData:
        """
        Compute dynamic data for this workflow.

        :subject: package name of ``source_artifact``
        """
        source_data = workflow_utils.source_package_data(self)
        return BaseDynamicTaskData(
            subject=source_data.name,
            parameter_summary=f"{source_data.name}_{source_data.version}",
        )

    def _extract_artifact_details(
        self, artifact: Artifact | None
    ) -> tuple[str | None, str | None, WorkRequest | None]:
        """Extract details from a ``debian:lintian`` artifact."""
        source_version: str | None
        url: str | None
        wr: WorkRequest | None
        if artifact is None:
            source_version = None
            url = None
            wr = None
        else:
            analyzed_artifact = (
                artifact.relations.filter(
                    type=ArtifactRelation.Relations.RELATES_TO
                )
                .earliest("id")
                .target
            )
            analyzed_data = analyzed_artifact.create_data()
            assert isinstance(
                analyzed_data,
                (
                    DebianSourcePackage,
                    DebianBinaryPackage,
                    DebianUpload,
                ),
            )
            source_version = get_source_package_version(analyzed_data)
            url = artifact.get_absolute_url()
            wr = artifact.created_by_work_request
        return source_version, url, wr

    @staticmethod
    def compare_qa_results_fine_grained(
        reference: Artifact | None, new: Artifact | None
    ) -> tuple[RegressionAnalysisStatus, dict[str, list[str]]]:
        """Do a fine-grained comparison of two QA results."""
        reference_summary = (
            {} if reference is None else reference.data["summary"]
        )
        reference_count = reference_summary.get("tags_count_by_severity", {})
        reference_found = reference_summary.get("tags_found", [])
        new_summary = {} if new is None else new.data["summary"]
        new_count = new_summary.get("tags_count_by_severity", {})
        new_found = new_summary.get("tags_found", [])

        severities = ("warning", "error")
        if any(
            new_count.get(severity, 0) > reference_count.get(severity, 0)
            for severity in severities
        ):
            status = RegressionAnalysisStatus.REGRESSION
        elif any(
            new_count.get(severity, 0) < reference_count.get(severity, 0)
            for severity in severities
        ):
            status = RegressionAnalysisStatus.IMPROVEMENT
        else:
            status = RegressionAnalysisStatus.STABLE

        details = {
            "added_tags": sorted(set(new_found) - set(reference_found)),
            "removed_tags": sorted(set(reference_found) - set(new_found)),
        }

        return status, details

    def callback_regression_analysis(self) -> bool:
        """
        Analyze regressions compared to reference results.

        This is called once for each architecture, but updates the whole
        analysis for all architectures each time.  This is partly for
        simplicity and robustness (we don't need to work out how to combine
        the new analysis with a previous one), and partly to make it easier
        to handle cases where there isn't a one-to-one mapping between the
        reference results and the new results.
        """
        # Select the newest result for each architecture.
        reference_artifacts = self.find_reference_artifacts("lintian")
        new_artifacts = self.find_new_artifacts(
            "lintian", ArtifactCategory.LINTIAN
        )

        output_data = self.work_request.output_data or OutputData()
        output_data.regression_analysis = {}
        for architecture in sorted(
            set(reference_artifacts) | set(new_artifacts)
        ):
            reference = reference_artifacts.get(architecture)
            reference_source_version, reference_url, reference_wr = (
                self._extract_artifact_details(reference)
            )
            new = new_artifacts.get(architecture)
            new_source_version, new_url, new_wr = (
                self._extract_artifact_details(new)
            )

            # A single Lintian task may produce artifacts for multiple
            # architectures (e.g. amd64+all); that may result in regressions
            # or improvements at the level of work request statuses being
            # over-reported against all those architectures rather than just
            # the affected one.  We can live with this since it doesn't
            # change the combined status across all architectures.
            status = self.compare_qa_results(reference_wr, new_wr)
            fine_grained_status, details = self.compare_qa_results_fine_grained(
                reference, new
            )
            if status == RegressionAnalysisStatus.STABLE:
                status = fine_grained_status

            output_data.regression_analysis[architecture] = RegressionAnalysis(
                original_source_version=reference_source_version,
                original_url=reference_url,
                new_source_version=new_source_version,
                new_url=new_url,
                status=status,
                details=details,
            )

        self.work_request.output_data = output_data
        self.work_request.save()
        return True
