Source code for tubular.mapping

"""Contains transformers that apply different types of mappings to columns."""

from __future__ import annotations

from collections import OrderedDict
from typing import Any, Literal

import narwhals as nw
import numpy as np
import pandas as pd
import polars as pl
from beartype import beartype
from typing_extensions import deprecated

from tubular._utils import (
    _convert_dataframe_to_narwhals,
    _return_narwhals_or_native_dataframe,
    block_from_json,
)
from tubular.base import BaseTransformer, register
from tubular.types import DataFrame



[docs]
@register
class BaseMappingTransformer(BaseTransformer):
    """Base Transformer Extension for mapping transformers.

    Attributes
    ----------
    mappings : dict
        Dictionary of mappings for each column individually. The dict passed to mappings in
        init is set to the mappings attribute.

    mappings_from_null: dict[str, Any]
        dict storing what null values will be mapped to. Generally best to use an imputer,
        but this functionality is useful for inverting pipelines.

    return_dtypes: dict[str, RETURN_DTYPES]
        Dictionary of col:dtype for returned columns

    built_from_json: bool
        indicates if transformer was reconstructed from json, which limits it's supported
        functionality to .transform

    polars_compatible : bool
        class attribute, indicates whether transformer has been converted to polars/pandas agnostic narwhals framework

    jsonable: bool
        class attribute, indicates if transformer supports to/from_json methods

    FITS: bool
        class attribute, indicates whether transform requires fit to be run first

    lazyframe_compatible: bool
        class attribute, indicates whether transformer works with lazyframes

    Examples
    --------
    ```pycon
    >>> BaseMappingTransformer(
    ...     mappings={"a": {"Y": 1, "N": 0}},
    ...     return_dtypes={"a": "Int8"},
    ... )
    BaseMappingTransformer(mappings={'a': {'N': 0, 'Y': 1}},
                           return_dtypes={'a': 'Int8'})

    ```

    """

    polars_compatible = True

    lazyframe_compatible = True

    FITS = False

    jsonable = True

    RETURN_DTYPES = Literal[
        "String",
        "Object",
        "Categorical",
        "Boolean",
        "Int8",
        "Int16",
        "Int32",
        "Int64",
        "Float32",
        "Float64",
    ]

    @beartype
    def __init__(
        self,
        mappings: dict[str, dict[Any, Any]],
        return_dtypes: dict[str, RETURN_DTYPES] | None = None,
        **kwargs: bool | None,
    ) -> None:
        """Initialise class instance.

        Parameters
        ----------
        mappings : dict
            Dictionary containing column mappings. Each value in mappings should be a dictionary
            of key (column to apply mapping to) value (mapping dict for given columns) pairs. For
            example the following dict {'a': {1: 2, 3: 4}, 'b': {'a': 1, 'b': 2}} would specify
            a mapping for column a of 1->2, 3->4 and a mapping for column b of 'a'->1, b->2.

        return_dtypes: Optional[Dict[str, RETURN_DTYPES]]
            Dictionary of col:dtype for returned columns

        **kwargs
            Arbitrary keyword arguments passed onto BaseTransformer.init method.

        Raises
        ------
        ValueError:
            if multiple mappings for null values are provided

        """
        mappings_from_null = dict.fromkeys(mappings)
        for col, col_mappings in mappings.items():
            null_keys = [key for key in col_mappings if pd.isna(key)]

            if len(null_keys) > 1:
                multi_null_map_msg = f"Multiple mappings have been provided for null values in column {col}, transformer is set up to handle nan/None/NA as one"
                raise ValueError(
                    multi_null_map_msg,
                )

            # Assign the mapping to the single null key if it exists
            if len(null_keys) != 0:
                mappings_from_null[col] = col_mappings[null_keys[0]]

        self.mappings = mappings

        self.mappings_from_null = mappings_from_null

        columns = list(mappings.keys())

        # if return_dtypes is not provided, then infer from mappings
        if return_dtypes is not None:
            provided_return_dtype_keys = set(return_dtypes.keys())
        else:
            return_dtypes = {}
            provided_return_dtype_keys = set()

        for col in set(mappings.keys()).difference(provided_return_dtype_keys):
            return_dtypes[col] = self._infer_return_type(mappings, col)

        self.return_dtypes = return_dtypes

        super().__init__(columns=columns, **kwargs)
        self.is_fitted_ = True  # Does not fit


[docs]
    @block_from_json
    def to_json(self) -> dict[str, dict[str, Any]]:
        """Dump transformer to json dict.

        Returns
        -------
        dict[str, dict[str, Any]]:
            jsonified transformer. Nested dict containing levels for attributes
            set at init and fit.

        Examples
        --------
        ```pycon
        >>> mapping_transformer = BaseMappingTransformer(mappings={"a": {"x": 1}})

        >>> mapping_transformer.to_json()
        {'tubular_version': ..., 'classname': 'BaseMappingTransformer', 'init': {'copy': False, 'verbose': False, 'return_native': True, 'mappings': {'a': {'x': 1}}, 'return_dtypes': {'a': 'Int64'}}, 'fit': {'is_fitted_': True}}

        ```

        """
        json_dict = super().to_json()

        # replace columns arg with mappings arg
        del json_dict["init"]["columns"]
        json_dict["init"]["mappings"] = self.mappings
        json_dict["init"]["return_dtypes"] = self.return_dtypes

        return json_dict


    @staticmethod
    def _infer_return_type(
        mappings: dict[str, dict[str, str | float | int]],
        col: str,
    ) -> str:
        """Infer return_dtypes from provided mappings.

        Returns
        -------
            str:
                inferred dtype, e.g. 'Float64'

        Examples
        --------
        ```pycon
        >>> BaseMappingTransformer._infer_return_type({"a": {"Y": 1, "N": 0}}, col="a")
        'Int64'

        ```

        """
        return str(pl.Series(mappings[col].values()).dtype)


[docs]
    def transform(
        self,
        X: DataFrame,
        return_native_override: bool | None = None,
    ) -> DataFrame:
        """Check mappings dict has been fitted.

        Parameters
        ----------
        X : DataFrame
            Data to apply mappings to.

        return_native_override: Optional[bool]
            option to override return_native attr in transformer, useful when calling parent
            methods

        Returns
        -------
        X : DataFrame
            Input X, copied if specified by user.

        Examples
        --------
        ```pycon
        >>> import polars as pl

        >>> transformer = BaseMappingTransformer(
        ...     mappings={"a": {"Y": 1, "N": 0}},
        ...     return_dtypes={"a": "Int8"},
        ... )

        >>> test_df = pl.DataFrame({"a": ["Y", "N"], "b": [3, 4]})

        >>> # base class transform has no effect on data
        >>> transformer.transform(test_df)
        shape: (2, 2)
        ┌─────┬─────┐
        │ a   ┆ b   │
        │ --- ┆ --- │
        │ str ┆ i64 │
        ╞═════╪═════╡
        │ Y   ┆ 3   │
        │ N   ┆ 4   │
        └─────┴─────┘

        ```

        """
        X = _convert_dataframe_to_narwhals(X)

        return_native = self._process_return_native(return_native_override)

        self.check_is_fitted(["mappings", "return_dtypes", "is_fitted_"])

        X = super().transform(X, return_native_override=False)

        return _return_narwhals_or_native_dataframe(X, return_native)





[docs]
@register
class BaseMappingTransformMixin(BaseTransformer):
    """Mixin class to apply mappings to columns method.

    Transformer uses the mappings attribute which should be a dict of dicts/mappings
    for each required column.

    Attributes
    ----------
    built_from_json: bool
        indicates if transformer was reconstructed from json, which limits it's supported
        functionality to .transform

    polars_compatible : bool
        class attribute, indicates whether transformer has been converted to polars/pandas agnostic narwhals framework

    jsonable: bool
        class attribute, indicates if transformer supports to/from_json methods

    FITS: bool
        class attribute, indicates whether transform requires fit to be run first

    lazyframe_compatible: bool
        class attribute, indicates whether transformer works with lazyframes

    """

    polars_compatible = True

    lazyframe_compatible = True

    FITS = False

    jsonable = False


[docs]
    @beartype
    def transform(
        self,
        X: DataFrame,
        return_native_override: bool | None = None,
    ) -> DataFrame:
        """Apply mapping defined in the mappings dict to each column in the columns attribute.

        Parameters
        ----------
        X : DataFrame
            Data with nominal columns to transform.

        return_native_override: Optional[bool]
            option to override return_native attr in transformer, useful when calling parent
            methods

        Returns
        -------
        X : DataFrame
            Transformed input X with levels mapped according to mappings dict.

        #  not currently including doctest for this, as is not intended to be used
        #  independently (should be inherited as a mixin)

        """
        self.check_is_fitted(
            ["mappings", "return_dtypes", "mappings_from_null", "is_fitted_"]
        )

        X = _convert_dataframe_to_narwhals(X)

        backend = nw.get_native_namespace(X).__name__

        return_native = self._process_return_native(return_native_override)

        X = super().transform(X, return_native_override=False)

        mappable_conditions = {
            col: nw.col(col).is_in(self.mappings[col]) for col in self.mappings
        }

        # if the column is categorical, narwhals struggles to infer a type
        # during the when/then logic, so we need to tell polars to use string
        # as a common type.
        # types are then corrected before returning at the end
        schema = X.collect_schema()
        mapping_exprs = {
            col: nw.col(col).cast(nw.String)
            if schema[col] in {nw.Categorical, nw.Enum}
            else nw.col(col)
            for col in self.mappings
        }

        mapping_exprs = {
            col: nw.when(mappable_conditions[col])
            .then(
                # default here allows replace_strict to work, but the nulls are replaced
                # in the otherwise section anyway
                mapping_exprs[col].replace_strict(self.mappings[col], default=None)
            )
            .otherwise(mapping_exprs[col])
            for col in self.mappings
        }

        # finally, handle mappings from null (imputations)
        mapping_exprs = {
            col: (mapping_exprs[col].fill_null(self.mappings_from_null[col]))
            if self.mappings_from_null[col] is not None
            else mapping_exprs[col]
            for col in mapping_exprs
        }

        # handle casting for non-bool return types
        # (bool has special handling at end)
        mapping_exprs = {
            col: mapping_exprs[col].cast(getattr(nw, self.return_dtypes[col]))
            # pandas bool types need special handling
            if not (self.return_dtypes[col] == "Boolean" and backend == "pandas")
            else mapping_exprs[col]
            for col in mapping_exprs
        }

        X = (
            X.with_columns(
                **mapping_exprs,
            )
            if mapping_exprs
            else X
        )

        # this last section is needed to ensure pandas bool columns
        # are returned in sensible (non object) types
        # maybe_convert_dtypes will not run on an expression,
        # so do need a second with_columns call
        if "Boolean" in self.return_dtypes.values() and backend == "pandas":
            X = X.with_columns(
                nw.maybe_convert_dtypes(X[col]).cast(
                    getattr(nw, self.return_dtypes[col]),
                )
                if self.return_dtypes[col] == "Boolean"
                else nw.col(col)
                for col in self.mappings
            )

        return _return_narwhals_or_native_dataframe(X, return_native)





[docs]
@register
class MappingTransformer(BaseMappingTransformer, BaseMappingTransformMixin):
    """Transformer to map values in columns to other values e.g. to merge two levels into one.

    Note, the MappingTransformer does not require 'self-mappings' to be defined i.e. if you want
    to map a value to itself, you can omit this value from the mappings rather than having to
    map it to itself.

    This transformer inherits from BaseMappingTransformMixin as well as the BaseMappingTransformer,
    BaseMappingTransformer performs standard checks, while BasemappingTransformMixin handles the
    actual logic.

    Parameters
    ----------
    mappings : dict
        Dictionary containing column mappings. Each value in mappings should be a dictionary
        of key (column to apply mapping to) value (mapping dict for given columns) pairs. For
        example the following dict {'a': {1: 2, 3: 4}, 'b': {'a': 1, 'b': 2}} would specify
        a mapping for column a of 1->2, 3->4 and a mapping for column b of 'a'->1, b->2.

    return_dtype: Optional[Dict[str, RETURN_DTYPES]]
        Dictionary of col:dtype for returned columns

    **kwargs
        Arbitrary keyword arguments passed onto BaseMappingTransformer.init method.

    Attributes
    ----------
    mappings : dict
        Dictionary of mappings for each column individually. The dict passed to mappings in
        init is set to the mappings attribute.

    mappings_from_null: dict[str, Any]
        dict storing what null values will be mapped to. Generally best to use an imputer,
        but this functionality is useful for inverting pipelines.

    return_dtypes: dict[str, RETURN_DTYPES]
        Dictionary of col:dtype for returned columns

    built_from_json: bool
        indicates if transformer was reconstructed from json, which limits it's supported
        functionality to .transform

    polars_compatible : bool
        class attribute, indicates whether transformer has been converted to polars/pandas agnostic narwhals framework

    jsonable: bool
        class attribute, indicates if transformer supports to/from_json methods

    FITS: bool
        class attribute, indicates whether transform requires fit to be run first

    lazyframe_compatible: bool
        class attribute, indicates whether transformer works with lazyframes

    Examples
    --------
    ```pycon
    >>> transformer = MappingTransformer(
    ...     mappings={"a": {"Y": 1, "N": 0}},
    ...     return_dtypes={"a": "Int8"},
    ... )
    >>> transformer
    MappingTransformer(mappings={'a': {'N': 0, 'Y': 1}},
                       return_dtypes={'a': 'Int8'})

    >>> # transformer can also be dumped to json and reinitialised
    >>> json_dump = transformer.to_json()
    >>> json_dump
    {'tubular_version': ..., 'classname': 'MappingTransformer', 'init': {'copy': False, 'verbose': False, 'return_native': True, 'mappings': {'a': {'Y': 1, 'N': 0}}, 'return_dtypes': {'a': 'Int8'}}, 'fit': {'is_fitted_': True}}

    >>> MappingTransformer.from_json(json_dump)
    MappingTransformer(mappings={'a': {'N': 0, 'Y': 1}},
                       return_dtypes={'a': 'Int8'})

    ```

    """

    polars_compatible = True

    lazyframe_compatible = True

    FITS = False

    jsonable = True


[docs]
    @beartype
    def transform(
        self,
        X: DataFrame,
    ) -> DataFrame:
        """Transform the input data X according to the mappings in the mappings attribute dict.

        This method calls the BaseMappingTransformMixin.transform. Note, this transform method is
        different to some of the transform methods in the nominal module, even though they also
        use the BaseMappingTransformMixin.transform method. Here, if a value does not exist in
        the mapping it is unchanged.

        Parameters
        ----------
        X : DataFrame
            Data with nominal columns to transform.

        Returns
        -------
        X : DataFrame
            Transformed input X with levels mapped according to mappings dict.

        Examples
        --------
        ``pycon
        >>> import polars as pl

        >>> transformer = MappingTransformer(
        ...   mappings={'a': {'Y': 1, 'N': 0}},
        ...   return_dtypes={"a":"Int8"},
        ...    )

        >>> test_df=pl.DataFrame({'a': ["Y", "N"], 'b': [3,4]})

        >>> transformer.transform(test_df)
        shape: (2, 2)
        ┌─────┬─────┐
        │ a   ┆ b   │
        │ --- ┆ --- │
        │ i8  ┆ i64 │
        ╞═════╪═════╡
        │ 1   ┆ 3   │
        │ 0   ┆ 4   │
        └─────┴─────┘

        ```

        """
        self.check_is_fitted("is_fitted_")
        X = _convert_dataframe_to_narwhals(X)

        X = BaseTransformer.transform(self, X, return_native_override=False)

        X = BaseMappingTransformMixin.transform(
            self,
            X,
            return_native_override=False,
        )

        return _return_narwhals_or_native_dataframe(X, self.return_native)




# DEPRECATED TRANSFORMERS

[docs]
@deprecated(
    """This transformer has not been selected for conversion to polars/narwhals,
    and so has been deprecated. If it is useful to you, please raise an issue
    for it to be modernised
    """,
)
class BaseCrossColumnMappingTransformer(BaseMappingTransformer):
    """BaseMappingTransformer Extension for cross column mapping transformers.

    Attributes
    ----------
    adjust_column : str
        Column containing the values to be adjusted.

    mappings : dict
        Dictionary of mappings for each column individually to be applied to the adjust_column.
        The dict passed to mappings in init is set to the mappings attribute.

    built_from_json: bool
        indicates if transformer was reconstructed from json, which limits it's supported
        functionality to .transform

    polars_compatible : bool
        class attribute, indicates whether transformer has been converted to polars/pandas agnostic narwhals framework

    jsonable: bool
        class attribute, indicates if transformer supports to/from_json methods

    FITS: bool
        class attribute, indicates whether transform requires fit to be run first

    lazyframe_compatible: bool
        class attribute, indicates whether transformer works with lazyframes

    deprecated: bool
        indicates if class has been deprecated

    """

    polars_compatible = False

    lazyframe_compatible = False

    FITS = False

    jsonable = False

    deprecated = True

    def __init__(
        self,
        adjust_column: str,
        mappings: dict[str, dict],
        **kwargs: dict[str, bool],
    ) -> None:
        """Initialise class instance.

        Parameters
        ----------
        adjust_column : str
            The column to be adjusted.

        mappings : dict or OrderedDict
            Dictionary containing adjustments. Exact structure will vary by child class.

        **kwargs
            Arbitrary keyword arguments passed onto BaseTransformer.init method.

        Raises
        ------
        TypeError:
            if adjust_column is not string type.

        """
        super().__init__(mappings=mappings, **kwargs)

        if not isinstance(adjust_column, str):
            msg = f"{self.classname()}: adjust_column should be a string"
            raise TypeError(msg)

        self.adjust_column = adjust_column


[docs]
    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
        """Check X is valid for transform and calls parent transform.

        Parameters
        ----------
        X : pd.DataFrame
            Data to apply adjustments to.

        Returns
        -------
        X : pd.DataFrame
            Transformed data X with adjustments applied to specified columns.

        Raises
        ------
        ValueError:
            if provided adjust_column is not in DataFrame.

        """
        X = super().transform(X)

        if self.adjust_column not in X.columns.to_numpy():
            msg = f"{self.classname()}: variable {self.adjust_column} is not in X"
            raise ValueError(msg)

        return X





[docs]
@deprecated(
    """This transformer has not been selected for conversion to polars/narwhals,
    and so has been deprecated. If it is useful to you, please raise an issue
    for it to be modernised
    """,
)
class CrossColumnMappingTransformer(BaseCrossColumnMappingTransformer):
    """Transformer to adjust values in one column based on the values of another column.

    Attributes
    ----------
    adjust_column : str
        Column containing the values to be adjusted.

    mappings : dict
        Dictionary of mappings for each column individually to be applied to the adjust_column.
        The dict passed to mappings in init is set to the mappings attribute.

    built_from_json: bool
        indicates if transformer was reconstructed from json, which limits it's supported
        functionality to .transform

    polars_compatible : bool
        class attribute, indicates whether transformer has been converted to polars/pandas agnostic narwhals framework

    jsonable: bool
        class attribute, indicates if transformer supports to/from_json methods

    FITS: bool
        class attribute, indicates whether transform requires fit to be run first

    lazyframe_compatible: bool
        class attribute, indicates whether transformer works with lazyframes

    deprecated: bool
        indicates if class has been deprecated

    """

    polars_compatible = False

    lazyframe_compatible = False

    jsonable = False

    FITS = False

    deprecated = True

    def __init__(
        self,
        adjust_column: str,
        mappings: dict[str, dict],
        **kwargs: dict[str, bool],
    ) -> None:
        """Initialise class instance.

        Parameters
        ----------
        adjust_column : str
            The column to be adjusted.

        mappings : dict or OrderedDict
            Dictionary containing adjustments. Each value in adjustments should be a dictionary
            of key (column to apply adjustment based on) value (adjustment dict for given columns) pairs. For
            example the following dict {'a': {1: 'a', 3: 'b'}, 'b': {'a': 1, 'b': 2}}
            would replace the values in the adjustment column based off the values in column a using the mapping
            1->'a', 3->'b' and also replace based off the values in column b using a mapping 'a'->1, 'b'->2.
            If more than one column is defined for this mapping, then this object must be an OrderedDict
            to ensure reproducibility.

        **kwargs
            Arbitrary keyword arguments passed onto BaseTransformer.init method.

        Raises
        ------
        TypeError:
            if mappings is not ordered dict, or only contains one key.

        """
        super().__init__(mappings=mappings, adjust_column=adjust_column, **kwargs)

        if len(mappings) > 1 and not isinstance(mappings, OrderedDict):
            msg = f"{self.classname()}: mappings should be an ordered dict for 'replace' mappings using multiple columns"
            raise TypeError(msg)


[docs]
    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
        """Transform values in given column using the values provided in the adjustments dictionary.

        Parameters
        ----------
        X : pd.DataFrame
            Data to apply adjustments to.

        Returns
        -------
        X : pd.DataFrame
            Transformed data X with adjustments applied to specified columns.

        """
        X = super().transform(X)

        for i in self.columns:
            for j in self.mappings[i]:
                X[self.adjust_column] = np.where(
                    (X[i] == j),
                    self.mappings[i][j],
                    X[self.adjust_column],
                )

        return X





[docs]
@deprecated(
    """This transformer has not been selected for conversion to polars/narwhals,
    and so has been deprecated. If it is useful to you, please raise an issue
    for it to be modernised
    """,
)
class BaseCrossColumnNumericTransformer(BaseCrossColumnMappingTransformer):
    """BaseCrossColumnNumericTransformer Extension for cross column numerical mapping transformers.

    Attributes
    ----------
    adjust_column : str
        Column containing the values to be adjusted.

    mappings : dict
        Dictionary of mappings for each column individually to be applied to the adjust_column.
        The dict passed to mappings in init is set to the mappings attribute.

    built_from_json: bool
        indicates if transformer was reconstructed from json, which limits it's supported
        functionality to .transform

    polars_compatible : bool
        class attribute, indicates whether transformer has been converted to polars/pandas agnostic narwhals framework

    jsonable: bool
        class attribute, indicates if transformer supports to/from_json methods

    FITS: bool
        class attribute, indicates whether transform requires fit to be run first

    lazyframe_compatible: bool
        class attribute, indicates whether transformer works with lazyframes

    deprecated: bool
        indicates if class has been deprecated

    """

    polars_compatible = False

    lazyframe_compatible = False

    FITS = False

    jsonable = False

    deprecated = True

    def __init__(
        self,
        adjust_column: str,
        mappings: dict[str, dict],
        **kwargs: dict[str, bool],
    ) -> None:
        """Initialise class instance.

        Parameters
        ----------
        adjust_column : str
            The column to be adjusted.

        mappings : dict
            Dictionary containing adjustments. Exact structure will vary by child class.

        **kwargs
            Arbitrary keyword arguments passed onto BaseTransformer.init method.

        Raises
        ------
        TypeError:
            if provided columns are non-numeric.

        """
        super().__init__(mappings=mappings, adjust_column=adjust_column, **kwargs)

        for j in mappings.values():
            for k in j.values():
                if type(k) not in {int, float}:
                    msg = f"{self.classname()}: mapping values must be numeric"
                    raise TypeError(msg)


[docs]
    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
        """Check X is valid for transform and calls parent transform.

        Parameters
        ----------
        X : pd.DataFrame
            Data to apply adjustments to.

        Returns
        -------
        X : pd.DataFrame
            Transformed data X with adjustments applied to specified columns.

        Raises
        ------
        TypeError:
            if provided columns are non-numeric

        """
        X = super().transform(X)

        if not pd.api.types.is_numeric_dtype(X[self.adjust_column]):
            msg = f"{self.classname()}: variable {self.adjust_column} must have numeric dtype."
            raise TypeError(msg)

        return X





[docs]
@deprecated(
    """This transformer has not been selected for conversion to polars/narwhals,
    and so has been deprecated. If it is useful to you, please raise an issue
    for it to be modernised
    """,
)
class CrossColumnMultiplyTransformer(BaseCrossColumnNumericTransformer):
    """Transformer to apply a multiplicative adjustment to values in one column based on the values of another column.

    Attributes
    ----------
    adjust_column : str
        Column containing the values to be adjusted.

    mappings : dict
        Dictionary of multiplicative adjustments for each column individually to be applied to the adjust_column.
        The dict passed to mappings in init is set to the mappings attribute.

    built_from_json: bool
        indicates if transformer was reconstructed from json, which limits it's supported
        functionality to .transform

    polars_compatible : bool
        class attribute, indicates whether transformer has been converted to polars/pandas agnostic narwhals framework

    jsonable: bool
        class attribute, indicates if transformer supports to/from_json methods

    FITS: bool
        class attribute, indicates whether transform requires fit to be run first

    lazyframe_compatible: bool
        class attribute, indicates whether transformer works with lazyframes

    deprecated: bool
        indicates if class has been deprecated

    """

    polars_compatible = False

    lazyframe_compatible = False

    FITS = False

    jsonable = False

    deprecated = True

    def __init__(
        self,
        adjust_column: str,
        mappings: dict[str, dict],
        **kwargs: dict[str, bool],
    ) -> None:
        """Initialise class instance.

        Parameters
        ----------
        adjust_column : str
            The column to be adjusted.  The data type of this column must be int or float.

        mappings : dict
            Dictionary containing adjustments. Each value in adjustments should be a dictionary
            of key (column to apply adjustment based on) value (adjustment dict for given columns) pairs. For
            example the following dict {'a': {1: 2, 3: 5}, 'b': {'a': 0.5, 'b': 1.1}}
            would multiply the values in the adjustment column based off the values in column a using the mapping
            1->2*value, 3->5*value and also multiply based off the values in column b using a mapping
            'a'->0.5*value, 'b'->1.1*value.
            The values within the dicts defining the multipliers must have type int or float.

        **kwargs
            Arbitrary keyword arguments passed onto BaseTransformer.init method.

        """
        super().__init__(mappings=mappings, adjust_column=adjust_column, **kwargs)


[docs]
    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
        """Transform values in given column using the values provided in the adjustments dictionary.

        Parameters
        ----------
        X : pd.DataFrame
            Data to apply adjustments to.

        Returns
        -------
        X : pd.DataFrame
            Transformed data X with adjustments applied to specified columns.

        """
        X = super().transform(X)

        for i in self.columns:
            for j in self.mappings[i]:
                X[self.adjust_column] = np.where(
                    (X[i] == j),
                    X[self.adjust_column] * self.mappings[i][j],
                    X[self.adjust_column],
                )

        return X





[docs]
@deprecated(
    """This transformer has not been selected for conversion to polars/narwhals,
    and so has been deprecated. If it is useful to you, please raise an issue
    for it to be modernised
    """,
)
class CrossColumnAddTransformer(BaseCrossColumnNumericTransformer):
    """Transformer to apply an additive adjustment to values in one column based on the values of another column.

    Attributes
    ----------
    adjust_column : str
        Column containing the values to be adjusted.

    mappings : dict
        Dictionary of additive adjustments for each column individually to be applied to the adjust_column.
        The dict passed to mappings in init is set to the mappings attribute.

    built_from_json: bool
        indicates if transformer was reconstructed from json, which limits it's supported
        functionality to .transform

    polars_compatible : bool
        class attribute, indicates whether transformer has been converted to polars/pandas agnostic narwhals framework

    jsonable: bool
        class attribute, indicates if transformer supports to/from_json methods

    FITS: bool
        class attribute, indicates whether transform requires fit to be run first

    lazyframe_compatible: bool
        class attribute, indicates whether transformer works with lazyframes

    deprecated: bool
        indicates if class has been deprecated

    """

    polars_compatible = False

    lazyframe_compatible = False

    FITS = False

    jsonable = False

    deprecated = True

    def __init__(
        self,
        adjust_column: str,
        mappings: dict[str, dict],
        **kwargs: dict[str, bool],
    ) -> None:
        """Initialise class instance.

        Parameters
        ----------
        adjust_column : str
            The column to be adjusted.  The data type of this column must be int or float.

        mappings : dict
            Dictionary containing adjustments. Each value in adjustments should be a dictionary
            of key (column to apply adjustment based on) value (adjustment dict for given columns) pairs. For
            example the following dict {'a': {1: 2, 3: 5}, 'b': {'a': 1, 'b': -5}}
            would provide an additive adjustment to the values in the adjustment column based off the values
            in column a using the mapping 1->2+value, 3->5+value and also an additive adjustment based off the
            values in column b using a mapping 'a'->1+value, 'b'->(-5)+value.
            The values within the dicts defining the values to be added must have type int or float.

        **kwargs
            Arbitrary keyword arguments passed onto BaseTransformer.init method.

        """
        super().__init__(mappings=mappings, adjust_column=adjust_column, **kwargs)


[docs]
    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
        """Transform values in given column using the values provided in the adjustments dictionary.

        Parameters
        ----------
        X : pd.DataFrame
            Data to apply adjustments to.

        Returns
        -------
        X : pd.DataFrame
            Transformed data X with adjustments applied to specified columns.

        """
        X = super().transform(X)

        for i in self.columns:
            for j in self.mappings[i]:
                X[self.adjust_column] = np.where(
                    (X[i] == j),
                    X[self.adjust_column] + self.mappings[i][j],
                    X[self.adjust_column],
                )

        return X