Source code for deluxe.sequences

# Copyright (c) 2024 - Gilles Coissac
# This file is part of standard-deluxe library.
#
# standard-deluxe is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published
# by the Free Software Foundation, either version 3 of the License,
# or (at your option) any later version.
#
# standard-deluxe is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with standard-deluxe. If not, see <https://www.gnu.org/licenses/>
#
# ruff: noqa: PYI025, PYI019, PLW1641, ARG002, PLR1711, FURB180
"""Ordered set data structures and sequence utilities.

This module provides ordered set implementations that preserve insertion
order while supporting the full :class:`~collections.abc.Set` protocol.
Two variants are provided:

* :class:`OrderedFrozenSet` — an immutable, hashable ordered set.
* :class:`OrderedSet` — a mutable ordered set supporting in-place updates.

The :func:`dedup_list` helper deduplicates a sequence while preserving
element order.
"""

from __future__ import annotations

from abc import ABCMeta
from collections.abc import Hashable, Iterator, MutableSet, Sequence, Set
from copy import copy
from functools import reduce
from operator import (
    and_,  # pyright: ignore[reportUnknownVariableType]
    ior,  # pyright: ignore[reportUnknownVariableType]
    isub,  # pyright: ignore[reportUnknownVariableType]
    ixor,  # pyright: ignore[reportUnknownVariableType]
    or_,  # pyright: ignore[reportUnknownVariableType]
    sub,
    xor,  # pyright: ignore[reportUnknownVariableType]
)
from typing import TYPE_CHECKING, ClassVar, Generic, Literal, TypeVar, cast


if TYPE_CHECKING:
    from collections.abc import Iterable

# NOTE: python operator module is poorly typed, this will cause
#       a lot of type casting and type ignore comment.
#       See: https://github.com/python/typeshed/issues/15611
#
# TODO: Implements sequence module in C

__ALL__ = ("OrderedFrozenSet", "OrderedSet", "dedup_list")  # pragma: no cover


_V = TypeVar("_V")
_OST = TypeVar("_OST")


[docs] def dedup_list(iterable: Iterable[object], lifo: bool = False) -> list[object]: """Return a list of distinct items from iterable. By default first element will be kept at their index removing further duplicate occurrences. Setting lifo to True will inverse this behavior. Args: iterable (:class:`Iterable[object]`): The input Sequence. lifo (bool, optional): If True, return the distinct elements in Last-In-First-Out order. Defaults to False. Returns: list: A new list containing only distinct items. """ unique_: list[object] = [] list_ = list(iterable)[::-1] if lifo else list(iterable) for v in list_: if v not in unique_: unique_.append(v) return unique_[::-1] if lifo else unique_
class _AbstractOrderedSet(Set[_V], Generic[_V], metaclass=ABCMeta): # pragma: no cover __slots__: ClassVar[tuple[str, ...]] = ("_map",) def __new__(cls: type[_OST], iterable: Iterable[_V] | None = None) -> _OST: instance = super().__new__(cls) # pyright: ignore[reportArgumentType] instance._map = {k: i for i, k in enumerate(iterable)} if iterable else {} return instance def __init__(self, iterable: Iterable[_V] | None = None) -> None: self._map: dict[_V, int] def __len__(self) -> int: return len(self._map) def __contains__(self, value: object) -> bool: return value in self._map def __iter__(self) -> Iterator[_V]: yield from self._map # XXX: result of operators are typed AbstractSet[_V] def __eq__(self, other: object) -> bool: if isinstance(other, _AbstractOrderedSet): other = cast("_AbstractOrderedSet[_V]", other) return len(self) == len(other) and tuple(self._map.keys()) == tuple(other._map.keys()) if isinstance(other, Set): other = cast("Set[_V]", other) return super().__eq__(set[_V](other)) return False def __repr__(self) -> str: return f"{self.__class__.__name__}({'{'}{', '.join(repr(e) for e in self._map)}{'}'})" def copy(self: _OST) -> _OST: """Return a shallow copy of the set.""" return copy(self) def issubset(self, other: Iterable[_V]) -> bool: """Return whether every element in the set is in other.""" return self <= OrderedSet(other) def issuperset(self, other: Iterable[_V]) -> bool: """Return whether every element in other is in the set.""" return self >= OrderedSet(other) def union(self: _OST, *others: Iterable[_V]) -> _OST: """Return a new set with elements from the set and all others.""" return cast("_OST", reduce(or_, (self, *others)) if others else copy(self)) # pyright: ignore[reportUnknownArgumentType] def intersection(self: _OST, *others: Iterable[_V]) -> _OST: """Return a new set with elements common to the set and all others.""" return cast("_OST", reduce(and_, (self, *others)) if others else copy(self)) # pyright: ignore[reportUnknownArgumentType] def difference(self: _OST, *others: Iterable[_V]) -> _OST: """Return a new set with elements in the set that are not in the others.""" return cast("_OST", reduce(sub, (self, *others)) if others else copy(self)) # pyright: ignore[reportArgumentType] def symmetric_difference(self: _OST, *others: Iterable[_V]) -> _OST: """Return a new set with elements in either the set or other but not both.""" return cast("_OST", reduce(xor, (self, *others)) if others else copy(self)) # pyright: ignore[reportUnknownArgumentType] # Sequence protocol: # NOTE: uses a linear search through the keys, # which is inefficient (O(n) complexity). def __getitem__(self, index: int) -> _V: index = index if index >= 0 else index + len(self) if index >= 0: for i, elmnt in enumerate(self._map): if i == index: return elmnt msg = "index out of range" raise IndexError(msg) def __reversed__(self) -> Iterator[_V]: # XXX: reversed(ordered_set) is typed reversed[Any] yield from self._map.__reversed__() def index(self, item: _V) -> int: """Return the position of *item* in the set. Args: item: The element to locate. Returns: :obj:`int`: The index of *item* in insertion order. Raises: ValueError: If *item* is not in the set. """ if item not in self._map: raise ValueError for i, elmnt in enumerate(self._map.keys()): if elmnt == item: return i raise ValueError def count(self, item: _V) -> Literal[0, 1]: """Return ``1`` if *item* is in the set, ``0`` otherwise.""" return 1 if item in self._map else 0
[docs] class OrderedFrozenSet(_AbstractOrderedSet[_V], Hashable): """An immutable ordered set that preserves insertion order. :class:`OrderedFrozenSet` supports the full :class:`~collections.abc.Set` protocol plus the :class:`~collections.abc.Sequence` protocol (indexing, ``reversed()``, :meth:`index`, :meth:`count`). Because it is :class:`~collections.abc.Hashable` it can be used as a dictionary key or as a member of another set. For a mutable variant see :class:`OrderedSet`. .. note:: __getitem__ method uses a linear search through the keys, which is inefficient (O(n) complexity) compared to a :class:`frozenset`. """ __slots__: ClassVar[tuple[str, ...]] = () def __hash__(self) -> int: return hash(tuple(self._map.keys()))
[docs] class OrderedSet(_AbstractOrderedSet[_V], MutableSet[_V]): """A mutable ordered set that preserves insertion order. :class:`OrderedSet` extends :class:`~collections.abc.MutableSet` with insertion-order memory and supports the :class:`~collections.abc.Sequence` protocol (indexing, ``reversed()``, :meth:`index`, :meth:`count`). .. note:: The :meth:`pop` method removes and returns the **most recently** added element (LIFO order). .. note:: __getitem__ method uses a linear search through the keys, which is inefficient (O(n) complexity) compared to a :class:`set`. """ __slots__: ClassVar[tuple[str, ...]] = ()
[docs] def add(self, value: _V) -> None: """Add *value* to the set if it is not already present.""" if value not in self._map: self._map[value] = len(self._map) + 1
[docs] def discard(self, value: _V) -> None: """Remove *value* from the set if it is present; otherwise do nothing.""" if value in self._map: self._map.pop(value)
[docs] def pop(self) -> _V: """Return and remove the most recently added element (LIFO order). Raises: KeyError: If the set is empty. """ try: tmp = self[-1] del self._map[tmp] except IndexError: raise KeyError from None else: return tmp
[docs] def clear(self) -> None: """Remove all elements from the set.""" self._map.clear()
[docs] def update(self, *others: Iterable[_V]) -> None: """Update the set, adding the union of all iterables.""" if others: reduce(ior, (self, *others)) # pyright: ignore[reportUnknownArgumentType] return # pragma: no cover
[docs] def intersection_update(self, *others: Iterable[_V]) -> None: """Update the set, keeping only elements found in it and all others.""" if others: # reduce(iand, (self, *others)) tmp: OrderedSet[_V] = OrderedSet[_V](self) for o in others: tmp = cast("OrderedSet[_V]", and_(tmp, OrderedSet(o))) self._map = tmp._map # pyright: ignore[reportUnannotatedClassAttribute] return # pragma: no cover
[docs] def difference_update(self, *others: Iterable[_V]) -> None: """Update the set, removing elements found in others.""" if others: reduce(isub, (self, *others)) # pyright: ignore[reportUnknownArgumentType] return # pragma: no cover
[docs] def symmetric_difference_update(self, *others: Iterable[_V]) -> None: """Update the set, keeping only elements found in either set, but not in both.""" if others: reduce(ixor, (self, *others)) # pyright: ignore[reportUnknownArgumentType] return # pragma: no cover
Set.register(OrderedFrozenSet) # pyright: ignore[reportUnknownMemberType, reportAttributeAccessIssue] Set.register(OrderedSet) # pyright: ignore[reportUnknownMemberType, reportAttributeAccessIssue] Sequence.register(OrderedFrozenSet) # pyright: ignore[reportUnknownMemberType, reportAttributeAccessIssue] Sequence.register(OrderedSet) # pyright: ignore[reportUnknownMemberType, reportAttributeAccessIssue]