"""Automatic discovery of Python modules and packages (for inclusion in the
distribution) and other config values.

For the purposes of this module, the following nomenclature is used:

- "src-layout": a directory representing a Python project that contains a "src"
  folder. Everything under the "src" folder is meant to be included in the
  distribution when packaging the project. Example::

    .
    ├── tox.ini
    ├── pyproject.toml
    └── src/
        └── mypkg/
            ├── __init__.py
            ├── mymodule.py
            └── my_data_file.txt

- "flat-layout": a Python project that does not use "src-layout" but instead
  have a directory under the project root for each package::

    .
    ├── tox.ini
    ├── pyproject.toml
    └── mypkg/
        ├── __init__.py
        ├── mymodule.py
        └── my_data_file.txt

- "single-module": a project that contains a single Python script direct under
  the project root (no directory used)::

    .
    ├── tox.ini
    ├── pyproject.toml
    └── mymodule.py

"""

from __future__ import annotations

import itertools
import os
from collections.abc import Iterable, Iterator, Mapping
from fnmatch import fnmatchcase
from glob import glob
from pathlib import Path
from typing import TYPE_CHECKING, ClassVar

import _distutils_hack.override  # noqa: F401

from ._path import StrPath

from distutils import log
from distutils.util import convert_path

if TYPE_CHECKING:
    from setuptools import Distribution

chain_iter = itertools.chain.from_iterable


def _valid_name(path: StrPath) -> bool:
    # Ignore invalid names that cannot be imported directly
    return os.path.basename(path).isidentifier()


class _Filter:
    """
    Given a list of patterns, create a callable that will be true only if
    the input matches at least one of the patterns.
    """

    def __init__(self, *patterns: str) -> None:
        self._patterns = dict.fromkeys(patterns)

    def __call__(self, item: str) -> bool:
        return any(fnmatchcase(item, pat) for pat in self._patterns)

    def __contains__(self, item: str) -> bool:
        return item in self._patterns


class _Finder:
    """Base class that exposes functionality for module/package finders"""

    ALWAYS_EXCLUDE: ClassVar[tuple[str, ...]] = ()
    DEFAULT_EXCLUDE: ClassVar[tuple[str, ...]] = ()

    @classmethod
    def find(
        cls,
        where: StrPath = '.',
        exclude: Iterable[str] = (),
        include: Iterable[str] = ('*',),
    ) -> list[str]:
        """Return a list of all Python items (packages or modules, depending on
        the finder implementation) found within directory ``where``.

        ``where`` is the root directory which will be searched.
        It should be supplied as a "cross-platform" (i.e. URL-style) path;
        it will be converted to the appropriate local path syntax.

        ``exclude`` is a sequence of names to exclude; ``*`` can be used
        as a wildcard in the names.
        When finding packages, ``foo.*`` will exclude all subpackages of ``foo``
        (but not ``foo`` itself).

        ``include`` is a sequence of names to include.
        If it's specified, only the named items will be included.
        If it's not specified, all found items will be included.
        ``include`` can contain shell style wildcard patterns just like
        ``exclude``.
        """

        exclude = exclude or cls.DEFAULT_EXCLUDE
        return list(
            cls._find_iter(
                convert_path(str(where)),
                _Filter(*cls.ALWAYS_EXCLUDE, *exclude),
                _Filter(*include),
            )
        )

    @classmethod
    def _find_iter(
        cls, where: StrPath, exclude: _Filter, include: _Filter
    ) -> Iterator[str]:
        raise NotImplementedError


class PackageFinder(_Finder):
    """
    Generate a list of all Python packages found within a directory
    """

    ALWAYS_EXCLUDE = ("ez_setup", "*__pycache__")

    @classmethod
    def _find_iter(
        cls, where: StrPath, exclude: _Filter, include: _Filter
    ) -> Iterator[str]:
        """
        All the packages found in 'where' that pass the 'include' filter, but
        not the 'exclude' filter.
        """
        for root, dirs, files in os.walk(str(where), followlinks=True):
            # Copy dirs to iterate over it, then empty dirs.
            all_dirs = dirs[:]
            dirs[:] = []

            for dir in all_dirs:
                full_path = os.path.join(root, dir)
                rel_path = os.path.relpath(full_path, where)
                package = rel_path.replace(os.path.sep, '.')

                # Skip directory trees that are not valid packages
                if '.' in dir or not cls._looks_like_package(full_path, package):
                    continue

                # Should this package be included?
                if include(package) and not exclude(package):
                    yield package

                # Early pruning if there is nothing else to be scanned
                if f"{package}*" in exclude or f"{package}.*" in exclude:
                    continue

                # Keep searching subdirectories, as there may be more packages
                # down there, even if the parent was excluded.
                dirs.append(dir)

    @staticmethod
    def _looks_like_package(path: StrPath, _package_name: str) -> bool:
        """Does a directory look like a package?"""
        return os.path.isfile(os.path.join(path, '__init__.py'))


class PEP420PackageFinder(PackageFinder):
    @staticmethod
    def _looks_like_package(_path: StrPath, _package_name: str) -> bool:
        return True


class ModuleFinder(_Finder):
    """Find isolated Python modules.
    This function will **not** recurse subdirectories.
    """

    @classmethod
    def _find_iter(
        cls, where: StrPath, exclude: _Filter, include: _Filter
    ) -> Iterator[str]:
        for file in glob(os.path.join(where, "*.py")):
            module, _ext = os.path.splitext(os.path.basename(file))

            if not cls._looks_like_module(module):
                continue

            if include(module) and not exclude(module):
                yield module

    _looks_like_module = staticmethod(_valid_name)


# We have to be extra careful in the case of flat layout to not include files
# and directories not meant for distribution (e.g. tool-related)


class FlatLayoutPackageFinder(PEP420PackageFinder):
    _EXCLUDE = (
        "ci",
        "bin",
        "debian",
        "doc",
        "docs",
        "documentation",
        "manpages",
        "news",
        "newsfragments",
        "changelog",
        "test",
        "tests",
        "unit_test",
        "unit_tests",
        "example",
        "examples",
        "scripts",
        "tools",
        "util",
        "utils",
        "python",
        "build",
        "dist",
        "venv",
        "env",
        "requirements",
        # ---- Task runners / Build tools ----
        "tasks",  # invoke
        "fabfile",  # fabric
        "site_scons",  # SCons
        # ---- Other tools ----
        "benchmark",
        "benchmarks",
        "exercise",
        "exercises",
        "htmlcov",  # Coverage.py
        # ---- Hidden directories/Private packages ----
        "[._]*",
    )

    DEFAULT_EXCLUDE = tuple(chain_iter((p, f"{p}.*") for p in _EXCLUDE))
    """Reserved package names"""

    @staticmethod
    def _looks_like_package(_path: StrPath, package_name: str) -> bool:
        names = package_name.split('.')
        # Consider PEP 561
        root_pkg_is_valid = names[0].isidentifier() or names[0].endswith("-stubs")
        return root_pkg_is_valid and all(name.isidentifier() for name in names[1:])


class FlatLayoutModuleFinder(ModuleFinder):
    DEFAULT_EXCLUDE = (
        "setup",
        "conftest",
        "test",
        "tests",
        "example",
        "examples",
        "build",
        # ---- Task runners ----
        "toxfile",
        "noxfile",
        "pavement",
        "dodo",
        "tasks",
        "fabfile",
        # ---- Other tools ----
        "[Ss][Cc]onstruct",  # SCons
        "conanfile",  # Connan: C/C++ build tool
        "manage",  # Django
        "benchmark",
        "benchmarks",
        "exercise",
        "exercises",
        # ---- Hidden files/Private modules ----
        "[._]*",
    )
    """Reserved top-level module names"""


def _find_packages_within(root_pkg: str, pkg_dir: StrPath) -> list[str]:
    nested = PEP420PackageFinder.find(pkg_dir)
    return [root_pkg] + [".".join((root_pkg, n)) for n in nested]


class ConfigDiscovery:
    """Fill-in metadata and options that can be automatically derived
    (from other metadata/options, the file system or conventions)
    """

    def __init__(self, distribution: Distribution) -> None:
        self.dist = distribution
        self._called = False
        self._disabled = False
        self._skip_ext_modules = False

    def _disable(self):
        """Internal API to disable automatic discovery"""
        self._disabled = True

    def _ignore_ext_modules(self):
        """Internal API to disregard ext_modules.

        Normally auto-discovery would not be triggered if ``ext_modules`` are set
        (this is done for backward compatibility with existing packages relying on
        ``setup.py`` or ``setup.cfg``). However, ``setuptools`` can call this function
        to ignore given ``ext_modules`` and proceed with the auto-discovery if
        ``packages`` and ``py_modules`` are not given (e.g. when using pyproject.toml
        metadata).
        """
        self._skip_ext_modules = True

    @property
    def _root_dir(self) -> StrPath:
        # The best is to wait until `src_root` is set in dist, before using _root_dir.
        return self.dist.src_root or os.curdir

    @property
    def _package_dir(self) -> dict[str, str]:
        if self.dist.package_dir is None:
            return {}
        return self.dist.package_dir

    def __call__(
        self, force: bool = False, name: bool = True, ignore_ext_modules: bool = False
    ) -> None:
        """Automatically discover missing configuration fields
        and modifies the given ``distribution`` object in-place.

        Note that by default this will only have an effect the first time the
        ``ConfigDiscovery`` object is called.

        To repeatedly invoke automatic discovery (e.g. when the project
        directory changes), please use ``force=True`` (or create a new
        ``ConfigDiscovery`` instance).
        """
        if force is False and (self._called or self._disabled):
            # Avoid overhead of multiple calls
            return

        self._analyse_package_layout(ignore_ext_modules)
        if name:
            self.analyse_name()  # depends on ``packages`` and ``py_modules``

        self._called = True

    def _explicitly_specified(self, ignore_ext_modules: bool) -> bool:
        """``True`` if the user has specified some form of package/module listing"""
        ignore_ext_modules = ignore_ext_modules or self._skip_ext_modules
        ext_modules = not (self.dist.ext_modules is None or ignore_ext_modules)
        return (
            self.dist.packages is not None
            or self.dist.py_modules is not None
            or ext_modules
            or hasattr(self.dist, "configuration")
            and self.dist.configuration
            # ^ Some projects use numpy.distutils.misc_util.Configuration
        )

    def _analyse_package_layout(self, ignore_ext_modules: bool) -> bool:
        if self._explicitly_specified(ignore_ext_modules):
            # For backward compatibility, just try to find modules/packages
            # when nothing is given
            return True

        log.debug(
            "No `packages` or `py_modules` configuration, performing "
            "automatic discovery."
        )

        return (
            self._analyse_explicit_layout()
            or self._analyse_src_layout()
            # flat-layout is the trickiest for discovery so it should be last
            or self._analyse_flat_layout()
        )

    def _analyse_explicit_layout(self) -> bool:
        """The user can explicitly give a package layout via ``package_dir``"""
        package_dir = self._package_dir.copy()  # don't modify directly
        package_dir.pop("", None)  # This falls under the "src-layout" umbrella
        root_dir = self._root_dir

        if not package_dir:
            return False

        log.debug(f"`explicit-layout` detected -- analysing {package_dir}")
        pkgs = chain_iter(
            _find_packages_within(pkg, os.path.join(root_dir, parent_dir))
            for pkg, parent_dir in package_dir.items()
        )
        self.dist.packages = list(pkgs)
        log.debug(f"discovered packages -- {self.dist.packages}")
        return True

    def _analyse_src_layout(self) -> bool:
        """Try to find all packages or modules under the ``src`` directory
        (or anything pointed by ``package_dir[""]``).

        The "src-layout" is relatively safe for automatic discovery.
        We assume that everything within is meant to be included in the
        distribution.

        If ``package_dir[""]`` is not given, but the ``src`` directory exists,
        this function will set ``package_dir[""] = "src"``.
        """
        package_dir = self._package_dir
        src_dir = os.path.join(self._root_dir, package_dir.get("", "src"))
        if not os.path.isdir(src_dir):
            return False

        log.debug(f"`src-layout` detected -- analysing {src_dir}")
        package_dir.setdefault("", os.path.basename(src_dir))
        self.dist.package_dir = package_dir  # persist eventual modifications
        self.dist.packages = PEP420PackageFinder.find(src_dir)
        self.dist.py_modules = ModuleFinder.find(src_dir)
        log.debug(f"discovered packages -- {self.dist.packages}")
        log.debug(f"discovered py_modules -- {self.dist.py_modules}")
        return True

    def _analyse_flat_layout(self) -> bool:
        """Try to find all packages and modules under the project root.

        Since the ``flat-layout`` is more dangerous in terms of accidentally including
        extra files/directories, this function is more conservative and will raise an
        error if multiple packages or modules are found.

        This assumes that multi-package dists are uncommon and refuse to support that
        use case in order to be able to prevent unintended errors.
        """
        log.debug(f"`flat-layout` detected -- analysing {self._root_dir}")
        return self._analyse_flat_packages() or self._analyse_flat_modules()

    def _analyse_flat_packages(self) -> bool:
        self.dist.packages = FlatLayoutPackageFinder.find(self._root_dir)
        top_level = remove_nested_packages(remove_stubs(self.dist.packages))
        log.debug(f"discovered packages -- {self.dist.packages}")
        self._ensure_no_accidental_inclusion(top_level, "packages")
        return bool(top_level)

    def _analyse_flat_modules(self) -> bool:
        self.dist.py_modules = FlatLayoutModuleFinder.find(self._root_dir)
        log.debug(f"discovered py_modules -- {self.dist.py_modules}")
        self._ensure_no_accidental_inclusion(self.dist.py_modules, "modules")
        return bool(self.dist.py_modules)

    def _ensure_no_accidental_inclusion(self, detected: list[str], kind: str):
        if len(detected) > 1:
            from inspect import cleandoc

            from setuptools.errors import PackageDiscoveryError

            msg = f"""Multiple top-level {kind} discovered in a flat-layout: {detected}.

            To avoid accidental inclusion of unwanted files or directories,
            setuptools will not proceed with this build.

            If you are trying to create a single distribution with multiple {kind}
            on purpose, you should not rely on automatic discovery.
            Instead, consider the following options:

            1. set up custom discovery (`find` directive with `include` or `exclude`)
            2. use a `src-layout`
            3. explicitly set `py_modules` or `packages` with a list of names

            To find more information, look for "package discovery" on setuptools docs.
            """
            raise PackageDiscoveryError(cleandoc(msg))

    def analyse_name(self) -> None:
        """The packages/modules are the essential contribution of the author.
        Therefore the name of the distribution can be derived from them.
        """
        if self.dist.metadata.name or self.dist.name:
            # get_name() is not reliable (can return "UNKNOWN")
            return

        log.debug("No `name` configuration, performing automatic discovery")

        name = (
            self._find_name_single_package_or_module()
            or self._find_name_from_packages()
        )
        if name:
            self.dist.metadata.name = name

    def _find_name_single_package_or_module(self) -> str | None:
        """Exactly one module or package"""
        for field in ('packages', 'py_modules'):
            items = getattr(self.dist, field, None) or []
            if items and len(items) == 1:
                log.debug(f"Single module/package detected, name: {items[0]}")
                return items[0]

        return None

    def _find_name_from_packages(self) -> str | None:
        """Try to find the root package that is not a PEP 420 namespace"""
        if not self.dist.packages:
            return None

        packages = remove_stubs(sorted(self.dist.packages, key=len))
        package_dir = self.dist.package_dir or {}

        parent_pkg = find_parent_package(packages, package_dir, self._root_dir)
        if parent_pkg:
            log.debug(f"Common parent package detected, name: {parent_pkg}")
            return parent_pkg

        log.warn("No parent package detected, impossible to derive `name`")
        return None


def remove_nested_packages(packages: list[str]) -> list[str]:
    """Remove nested packages from a list of packages.

    >>> remove_nested_packages(["a", "a.b1", "a.b2", "a.b1.c1"])
    ['a']
    >>> remove_nested_packages(["a", "b", "c.d", "c.d.e.f", "g.h", "a.a1"])
    ['a', 'b', 'c.d', 'g.h']
    """
    pkgs = sorted(packages, key=len)
    top_level = pkgs[:]
    size = len(pkgs)
    for i, name in enumerate(reversed(pkgs)):
        if any(name.startswith(f"{other}.") for other in top_level):
            top_level.pop(size - i - 1)

    return top_level


def remove_stubs(packages: list[str]) -> list[str]:
    """Remove type stubs (:pep:`561`) from a list of packages.

    >>> remove_stubs(["a", "a.b", "a-stubs", "a-stubs.b.c", "b", "c-stubs"])
    ['a', 'a.b', 'b']
    """
    return [pkg for pkg in packages if not pkg.split(".")[0].endswith("-stubs")]


def find_parent_package(
    packages: list[str], package_dir: Mapping[str, str], root_dir: StrPath
) -> str | None:
    """Find the parent package that is not a namespace."""
    packages = sorted(packages, key=len)
    common_ancestors = []
    for i, name in enumerate(packages):
        if not all(n.startswith(f"{name}.") for n in packages[i + 1 :]):
            # Since packages are sorted by length, this condition is able
            # to find a list of all common ancestors.
            # When there is divergence (e.g. multiple root packages)
            # the list will be empty
            break
        common_ancestors.append(name)

    for name in common_ancestors:
        pkg_path = find_package_path(name, package_dir, root_dir)
        init = os.path.join(pkg_path, "__init__.py")
        if os.path.isfile(init):
            return name

    return None


def find_package_path(
    name: str, package_dir: Mapping[str, str], root_dir: StrPath
) -> str:
    """Given a package name, return the path where it should be found on
    disk, considering the ``package_dir`` option.

    >>> path = find_package_path("my.pkg", {"": "root/is/nested"}, ".")
    >>> path.replace(os.sep, "/")
    './root/is/nested/my/pkg'

    >>> path = find_package_path("my.pkg", {"my": "root/is/nested"}, ".")
    >>> path.replace(os.sep, "/")
    './root/is/nested/pkg'

    >>> path = find_package_path("my.pkg", {"my.pkg": "root/is/nested"}, ".")
    >>> path.replace(os.sep, "/")
    './root/is/nested'

    >>> path = find_package_path("other.pkg", {"my.pkg": "root/is/nested"}, ".")
    >>> path.replace(os.sep, "/")
    './other/pkg'
    """
    parts = name.split(".")
    for i in range(len(parts), 0, -1):
        # Look backwards, the most specific package_dir first
        partial_name = ".".join(parts[:i])
        if partial_name in package_dir:
            parent = package_dir[partial_name]
            return os.path.join(root_dir, parent, *parts[i:])

    parent = package_dir.get("") or ""
    return os.path.join(root_dir, *parent.split("/"), *parts)


def construct_package_dir(packages: list[str], package_path: StrPath) -> dict[str, str]:
    parent_pkgs = remove_nested_packages(packages)
    prefix = Path(package_path).parts
    return {pkg: "/".join([*prefix, *pkg.split(".")]) for pkg in parent_pkgs}
