Source code for tubular.mapping
"""Contains transformers that apply different types of mappings to columns."""
from __future__ import annotations
from collections import OrderedDict
from typing import Any, Literal
import narwhals as nw
import numpy as np
import pandas as pd
import polars as pl
from beartype import beartype
from typing_extensions import deprecated
from tubular._utils import (
_convert_dataframe_to_narwhals,
_return_narwhals_or_native_dataframe,
block_from_json,
)
from tubular.base import BaseTransformer, register
from tubular.types import DataFrame
[docs]
@register
class BaseMappingTransformer(BaseTransformer):
"""Base Transformer Extension for mapping transformers.
Attributes
----------
mappings : dict
Dictionary of mappings for each column individually. The dict passed to mappings in
init is set to the mappings attribute.
mappings_from_null: dict[str, Any]
dict storing what null values will be mapped to. Generally best to use an imputer,
but this functionality is useful for inverting pipelines.
return_dtypes: dict[str, RETURN_DTYPES]
Dictionary of col:dtype for returned columns
built_from_json: bool
indicates if transformer was reconstructed from json, which limits it's supported
functionality to .transform
polars_compatible : bool
class attribute, indicates whether transformer has been converted to polars/pandas agnostic narwhals framework
jsonable: bool
class attribute, indicates if transformer supports to/from_json methods
FITS: bool
class attribute, indicates whether transform requires fit to be run first
lazyframe_compatible: bool
class attribute, indicates whether transformer works with lazyframes
Examples
--------
```pycon
>>> BaseMappingTransformer(
... mappings={"a": {"Y": 1, "N": 0}},
... return_dtypes={"a": "Int8"},
... )
BaseMappingTransformer(mappings={'a': {'N': 0, 'Y': 1}},
return_dtypes={'a': 'Int8'})
```
"""
polars_compatible = True
lazyframe_compatible = True
FITS = False
jsonable = True
RETURN_DTYPES = Literal[
"String",
"Object",
"Categorical",
"Boolean",
"Int8",
"Int16",
"Int32",
"Int64",
"Float32",
"Float64",
]
@beartype
def __init__(
self,
mappings: dict[str, dict[Any, Any]],
return_dtypes: dict[str, RETURN_DTYPES] | None = None,
**kwargs: bool | None,
) -> None:
"""Initialise class instance.
Parameters
----------
mappings : dict
Dictionary containing column mappings. Each value in mappings should be a dictionary
of key (column to apply mapping to) value (mapping dict for given columns) pairs. For
example the following dict {'a': {1: 2, 3: 4}, 'b': {'a': 1, 'b': 2}} would specify
a mapping for column a of 1->2, 3->4 and a mapping for column b of 'a'->1, b->2.
return_dtypes: Optional[Dict[str, RETURN_DTYPES]]
Dictionary of col:dtype for returned columns
**kwargs
Arbitrary keyword arguments passed onto BaseTransformer.init method.
Raises
------
ValueError:
if multiple mappings for null values are provided
"""
mappings_from_null = dict.fromkeys(mappings)
for col, col_mappings in mappings.items():
null_keys = [key for key in col_mappings if pd.isna(key)]
if len(null_keys) > 1:
multi_null_map_msg = f"Multiple mappings have been provided for null values in column {col}, transformer is set up to handle nan/None/NA as one"
raise ValueError(
multi_null_map_msg,
)
# Assign the mapping to the single null key if it exists
if len(null_keys) != 0:
mappings_from_null[col] = col_mappings[null_keys[0]]
self.mappings = mappings
self.mappings_from_null = mappings_from_null
columns = list(mappings.keys())
# if return_dtypes is not provided, then infer from mappings
if return_dtypes is not None:
provided_return_dtype_keys = set(return_dtypes.keys())
else:
return_dtypes = {}
provided_return_dtype_keys = set()
for col in set(mappings.keys()).difference(provided_return_dtype_keys):
return_dtypes[col] = self._infer_return_type(mappings, col)
self.return_dtypes = return_dtypes
super().__init__(columns=columns, **kwargs)
self.is_fitted_ = True # Does not fit
[docs]
@block_from_json
def to_json(self) -> dict[str, dict[str, Any]]:
"""Dump transformer to json dict.
Returns
-------
dict[str, dict[str, Any]]:
jsonified transformer. Nested dict containing levels for attributes
set at init and fit.
Examples
--------
```pycon
>>> mapping_transformer = BaseMappingTransformer(mappings={"a": {"x": 1}})
>>> mapping_transformer.to_json()
{'tubular_version': ..., 'classname': 'BaseMappingTransformer', 'init': {'copy': False, 'verbose': False, 'return_native': True, 'mappings': {'a': {'x': 1}}, 'return_dtypes': {'a': 'Int64'}}, 'fit': {'is_fitted_': True}}
```
"""
json_dict = super().to_json()
# replace columns arg with mappings arg
del json_dict["init"]["columns"]
json_dict["init"]["mappings"] = self.mappings
json_dict["init"]["return_dtypes"] = self.return_dtypes
return json_dict
@staticmethod
def _infer_return_type(
mappings: dict[str, dict[str, str | float | int]],
col: str,
) -> str:
"""Infer return_dtypes from provided mappings.
Returns
-------
str:
inferred dtype, e.g. 'Float64'
Examples
--------
```pycon
>>> BaseMappingTransformer._infer_return_type({"a": {"Y": 1, "N": 0}}, col="a")
'Int64'
```
"""
return str(pl.Series(mappings[col].values()).dtype)
[docs]
def transform(
self,
X: DataFrame,
return_native_override: bool | None = None,
) -> DataFrame:
"""Check mappings dict has been fitted.
Parameters
----------
X : DataFrame
Data to apply mappings to.
return_native_override: Optional[bool]
option to override return_native attr in transformer, useful when calling parent
methods
Returns
-------
X : DataFrame
Input X, copied if specified by user.
Examples
--------
```pycon
>>> import polars as pl
>>> transformer = BaseMappingTransformer(
... mappings={"a": {"Y": 1, "N": 0}},
... return_dtypes={"a": "Int8"},
... )
>>> test_df = pl.DataFrame({"a": ["Y", "N"], "b": [3, 4]})
>>> # base class transform has no effect on data
>>> transformer.transform(test_df)
shape: (2, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ str ┆ i64 │
╞═════╪═════╡
│ Y ┆ 3 │
│ N ┆ 4 │
└─────┴─────┘
```
"""
X = _convert_dataframe_to_narwhals(X)
return_native = self._process_return_native(return_native_override)
self.check_is_fitted(["mappings", "return_dtypes", "is_fitted_"])
X = super().transform(X, return_native_override=False)
return _return_narwhals_or_native_dataframe(X, return_native)
[docs]
@register
class BaseMappingTransformMixin(BaseTransformer):
"""Mixin class to apply mappings to columns method.
Transformer uses the mappings attribute which should be a dict of dicts/mappings
for each required column.
Attributes
----------
built_from_json: bool
indicates if transformer was reconstructed from json, which limits it's supported
functionality to .transform
polars_compatible : bool
class attribute, indicates whether transformer has been converted to polars/pandas agnostic narwhals framework
jsonable: bool
class attribute, indicates if transformer supports to/from_json methods
FITS: bool
class attribute, indicates whether transform requires fit to be run first
lazyframe_compatible: bool
class attribute, indicates whether transformer works with lazyframes
"""
polars_compatible = True
lazyframe_compatible = True
FITS = False
jsonable = False
[docs]
@beartype
def transform(
self,
X: DataFrame,
return_native_override: bool | None = None,
) -> DataFrame:
"""Apply mapping defined in the mappings dict to each column in the columns attribute.
Parameters
----------
X : DataFrame
Data with nominal columns to transform.
return_native_override: Optional[bool]
option to override return_native attr in transformer, useful when calling parent
methods
Returns
-------
X : DataFrame
Transformed input X with levels mapped according to mappings dict.
# not currently including doctest for this, as is not intended to be used
# independently (should be inherited as a mixin)
"""
self.check_is_fitted(
["mappings", "return_dtypes", "mappings_from_null", "is_fitted_"]
)
X = _convert_dataframe_to_narwhals(X)
backend = nw.get_native_namespace(X).__name__
return_native = self._process_return_native(return_native_override)
X = super().transform(X, return_native_override=False)
mappable_conditions = {
col: nw.col(col).is_in(self.mappings[col]) for col in self.mappings
}
# if the column is categorical, narwhals struggles to infer a type
# during the when/then logic, so we need to tell polars to use string
# as a common type.
# types are then corrected before returning at the end
schema = X.collect_schema()
mapping_exprs = {
col: nw.col(col).cast(nw.String)
if schema[col] in {nw.Categorical, nw.Enum}
else nw.col(col)
for col in self.mappings
}
mapping_exprs = {
col: nw.when(mappable_conditions[col])
.then(
# default here allows replace_strict to work, but the nulls are replaced
# in the otherwise section anyway
mapping_exprs[col].replace_strict(self.mappings[col], default=None)
)
.otherwise(mapping_exprs[col])
for col in self.mappings
}
# finally, handle mappings from null (imputations)
mapping_exprs = {
col: (mapping_exprs[col].fill_null(self.mappings_from_null[col]))
if self.mappings_from_null[col] is not None
else mapping_exprs[col]
for col in mapping_exprs
}
# handle casting for non-bool return types
# (bool has special handling at end)
mapping_exprs = {
col: mapping_exprs[col].cast(getattr(nw, self.return_dtypes[col]))
# pandas bool types need special handling
if not (self.return_dtypes[col] == "Boolean" and backend == "pandas")
else mapping_exprs[col]
for col in mapping_exprs
}
X = (
X.with_columns(
**mapping_exprs,
)
if mapping_exprs
else X
)
# this last section is needed to ensure pandas bool columns
# are returned in sensible (non object) types
# maybe_convert_dtypes will not run on an expression,
# so do need a second with_columns call
if "Boolean" in self.return_dtypes.values() and backend == "pandas":
X = X.with_columns(
nw.maybe_convert_dtypes(X[col]).cast(
getattr(nw, self.return_dtypes[col]),
)
if self.return_dtypes[col] == "Boolean"
else nw.col(col)
for col in self.mappings
)
return _return_narwhals_or_native_dataframe(X, return_native)
[docs]
@register
class MappingTransformer(BaseMappingTransformer, BaseMappingTransformMixin):
"""Transformer to map values in columns to other values e.g. to merge two levels into one.
Note, the MappingTransformer does not require 'self-mappings' to be defined i.e. if you want
to map a value to itself, you can omit this value from the mappings rather than having to
map it to itself.
This transformer inherits from BaseMappingTransformMixin as well as the BaseMappingTransformer,
BaseMappingTransformer performs standard checks, while BasemappingTransformMixin handles the
actual logic.
Parameters
----------
mappings : dict
Dictionary containing column mappings. Each value in mappings should be a dictionary
of key (column to apply mapping to) value (mapping dict for given columns) pairs. For
example the following dict {'a': {1: 2, 3: 4}, 'b': {'a': 1, 'b': 2}} would specify
a mapping for column a of 1->2, 3->4 and a mapping for column b of 'a'->1, b->2.
return_dtype: Optional[Dict[str, RETURN_DTYPES]]
Dictionary of col:dtype for returned columns
**kwargs
Arbitrary keyword arguments passed onto BaseMappingTransformer.init method.
Attributes
----------
mappings : dict
Dictionary of mappings for each column individually. The dict passed to mappings in
init is set to the mappings attribute.
mappings_from_null: dict[str, Any]
dict storing what null values will be mapped to. Generally best to use an imputer,
but this functionality is useful for inverting pipelines.
return_dtypes: dict[str, RETURN_DTYPES]
Dictionary of col:dtype for returned columns
built_from_json: bool
indicates if transformer was reconstructed from json, which limits it's supported
functionality to .transform
polars_compatible : bool
class attribute, indicates whether transformer has been converted to polars/pandas agnostic narwhals framework
jsonable: bool
class attribute, indicates if transformer supports to/from_json methods
FITS: bool
class attribute, indicates whether transform requires fit to be run first
lazyframe_compatible: bool
class attribute, indicates whether transformer works with lazyframes
Examples
--------
```pycon
>>> transformer = MappingTransformer(
... mappings={"a": {"Y": 1, "N": 0}},
... return_dtypes={"a": "Int8"},
... )
>>> transformer
MappingTransformer(mappings={'a': {'N': 0, 'Y': 1}},
return_dtypes={'a': 'Int8'})
>>> # transformer can also be dumped to json and reinitialised
>>> json_dump = transformer.to_json()
>>> json_dump
{'tubular_version': ..., 'classname': 'MappingTransformer', 'init': {'copy': False, 'verbose': False, 'return_native': True, 'mappings': {'a': {'Y': 1, 'N': 0}}, 'return_dtypes': {'a': 'Int8'}}, 'fit': {'is_fitted_': True}}
>>> MappingTransformer.from_json(json_dump)
MappingTransformer(mappings={'a': {'N': 0, 'Y': 1}},
return_dtypes={'a': 'Int8'})
```
"""
polars_compatible = True
lazyframe_compatible = True
FITS = False
jsonable = True
[docs]
@beartype
def transform(
self,
X: DataFrame,
) -> DataFrame:
"""Transform the input data X according to the mappings in the mappings attribute dict.
This method calls the BaseMappingTransformMixin.transform. Note, this transform method is
different to some of the transform methods in the nominal module, even though they also
use the BaseMappingTransformMixin.transform method. Here, if a value does not exist in
the mapping it is unchanged.
Parameters
----------
X : DataFrame
Data with nominal columns to transform.
Returns
-------
X : DataFrame
Transformed input X with levels mapped according to mappings dict.
Examples
--------
``pycon
>>> import polars as pl
>>> transformer = MappingTransformer(
... mappings={'a': {'Y': 1, 'N': 0}},
... return_dtypes={"a":"Int8"},
... )
>>> test_df=pl.DataFrame({'a': ["Y", "N"], 'b': [3,4]})
>>> transformer.transform(test_df)
shape: (2, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i8 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 3 │
│ 0 ┆ 4 │
└─────┴─────┘
```
"""
self.check_is_fitted("is_fitted_")
X = _convert_dataframe_to_narwhals(X)
X = BaseTransformer.transform(self, X, return_native_override=False)
X = BaseMappingTransformMixin.transform(
self,
X,
return_native_override=False,
)
return _return_narwhals_or_native_dataframe(X, self.return_native)
# DEPRECATED TRANSFORMERS
[docs]
@deprecated(
"""This transformer has not been selected for conversion to polars/narwhals,
and so has been deprecated. If it is useful to you, please raise an issue
for it to be modernised
""",
)
class BaseCrossColumnMappingTransformer(BaseMappingTransformer):
"""BaseMappingTransformer Extension for cross column mapping transformers.
Attributes
----------
adjust_column : str
Column containing the values to be adjusted.
mappings : dict
Dictionary of mappings for each column individually to be applied to the adjust_column.
The dict passed to mappings in init is set to the mappings attribute.
built_from_json: bool
indicates if transformer was reconstructed from json, which limits it's supported
functionality to .transform
polars_compatible : bool
class attribute, indicates whether transformer has been converted to polars/pandas agnostic narwhals framework
jsonable: bool
class attribute, indicates if transformer supports to/from_json methods
FITS: bool
class attribute, indicates whether transform requires fit to be run first
lazyframe_compatible: bool
class attribute, indicates whether transformer works with lazyframes
deprecated: bool
indicates if class has been deprecated
"""
polars_compatible = False
lazyframe_compatible = False
FITS = False
jsonable = False
deprecated = True
def __init__(
self,
adjust_column: str,
mappings: dict[str, dict],
**kwargs: dict[str, bool],
) -> None:
"""Initialise class instance.
Parameters
----------
adjust_column : str
The column to be adjusted.
mappings : dict or OrderedDict
Dictionary containing adjustments. Exact structure will vary by child class.
**kwargs
Arbitrary keyword arguments passed onto BaseTransformer.init method.
Raises
------
TypeError:
if adjust_column is not string type.
"""
super().__init__(mappings=mappings, **kwargs)
if not isinstance(adjust_column, str):
msg = f"{self.classname()}: adjust_column should be a string"
raise TypeError(msg)
self.adjust_column = adjust_column
[docs]
def transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""Check X is valid for transform and calls parent transform.
Parameters
----------
X : pd.DataFrame
Data to apply adjustments to.
Returns
-------
X : pd.DataFrame
Transformed data X with adjustments applied to specified columns.
Raises
------
ValueError:
if provided adjust_column is not in DataFrame.
"""
X = super().transform(X)
if self.adjust_column not in X.columns.to_numpy():
msg = f"{self.classname()}: variable {self.adjust_column} is not in X"
raise ValueError(msg)
return X
[docs]
@deprecated(
"""This transformer has not been selected for conversion to polars/narwhals,
and so has been deprecated. If it is useful to you, please raise an issue
for it to be modernised
""",
)
class CrossColumnMappingTransformer(BaseCrossColumnMappingTransformer):
"""Transformer to adjust values in one column based on the values of another column.
Attributes
----------
adjust_column : str
Column containing the values to be adjusted.
mappings : dict
Dictionary of mappings for each column individually to be applied to the adjust_column.
The dict passed to mappings in init is set to the mappings attribute.
built_from_json: bool
indicates if transformer was reconstructed from json, which limits it's supported
functionality to .transform
polars_compatible : bool
class attribute, indicates whether transformer has been converted to polars/pandas agnostic narwhals framework
jsonable: bool
class attribute, indicates if transformer supports to/from_json methods
FITS: bool
class attribute, indicates whether transform requires fit to be run first
lazyframe_compatible: bool
class attribute, indicates whether transformer works with lazyframes
deprecated: bool
indicates if class has been deprecated
"""
polars_compatible = False
lazyframe_compatible = False
jsonable = False
FITS = False
deprecated = True
def __init__(
self,
adjust_column: str,
mappings: dict[str, dict],
**kwargs: dict[str, bool],
) -> None:
"""Initialise class instance.
Parameters
----------
adjust_column : str
The column to be adjusted.
mappings : dict or OrderedDict
Dictionary containing adjustments. Each value in adjustments should be a dictionary
of key (column to apply adjustment based on) value (adjustment dict for given columns) pairs. For
example the following dict {'a': {1: 'a', 3: 'b'}, 'b': {'a': 1, 'b': 2}}
would replace the values in the adjustment column based off the values in column a using the mapping
1->'a', 3->'b' and also replace based off the values in column b using a mapping 'a'->1, 'b'->2.
If more than one column is defined for this mapping, then this object must be an OrderedDict
to ensure reproducibility.
**kwargs
Arbitrary keyword arguments passed onto BaseTransformer.init method.
Raises
------
TypeError:
if mappings is not ordered dict, or only contains one key.
"""
super().__init__(mappings=mappings, adjust_column=adjust_column, **kwargs)
if len(mappings) > 1 and not isinstance(mappings, OrderedDict):
msg = f"{self.classname()}: mappings should be an ordered dict for 'replace' mappings using multiple columns"
raise TypeError(msg)
[docs]
def transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""Transform values in given column using the values provided in the adjustments dictionary.
Parameters
----------
X : pd.DataFrame
Data to apply adjustments to.
Returns
-------
X : pd.DataFrame
Transformed data X with adjustments applied to specified columns.
"""
X = super().transform(X)
for i in self.columns:
for j in self.mappings[i]:
X[self.adjust_column] = np.where(
(X[i] == j),
self.mappings[i][j],
X[self.adjust_column],
)
return X
[docs]
@deprecated(
"""This transformer has not been selected for conversion to polars/narwhals,
and so has been deprecated. If it is useful to you, please raise an issue
for it to be modernised
""",
)
class BaseCrossColumnNumericTransformer(BaseCrossColumnMappingTransformer):
"""BaseCrossColumnNumericTransformer Extension for cross column numerical mapping transformers.
Attributes
----------
adjust_column : str
Column containing the values to be adjusted.
mappings : dict
Dictionary of mappings for each column individually to be applied to the adjust_column.
The dict passed to mappings in init is set to the mappings attribute.
built_from_json: bool
indicates if transformer was reconstructed from json, which limits it's supported
functionality to .transform
polars_compatible : bool
class attribute, indicates whether transformer has been converted to polars/pandas agnostic narwhals framework
jsonable: bool
class attribute, indicates if transformer supports to/from_json methods
FITS: bool
class attribute, indicates whether transform requires fit to be run first
lazyframe_compatible: bool
class attribute, indicates whether transformer works with lazyframes
deprecated: bool
indicates if class has been deprecated
"""
polars_compatible = False
lazyframe_compatible = False
FITS = False
jsonable = False
deprecated = True
def __init__(
self,
adjust_column: str,
mappings: dict[str, dict],
**kwargs: dict[str, bool],
) -> None:
"""Initialise class instance.
Parameters
----------
adjust_column : str
The column to be adjusted.
mappings : dict
Dictionary containing adjustments. Exact structure will vary by child class.
**kwargs
Arbitrary keyword arguments passed onto BaseTransformer.init method.
Raises
------
TypeError:
if provided columns are non-numeric.
"""
super().__init__(mappings=mappings, adjust_column=adjust_column, **kwargs)
for j in mappings.values():
for k in j.values():
if type(k) not in {int, float}:
msg = f"{self.classname()}: mapping values must be numeric"
raise TypeError(msg)
[docs]
def transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""Check X is valid for transform and calls parent transform.
Parameters
----------
X : pd.DataFrame
Data to apply adjustments to.
Returns
-------
X : pd.DataFrame
Transformed data X with adjustments applied to specified columns.
Raises
------
TypeError:
if provided columns are non-numeric
"""
X = super().transform(X)
if not pd.api.types.is_numeric_dtype(X[self.adjust_column]):
msg = f"{self.classname()}: variable {self.adjust_column} must have numeric dtype."
raise TypeError(msg)
return X
[docs]
@deprecated(
"""This transformer has not been selected for conversion to polars/narwhals,
and so has been deprecated. If it is useful to you, please raise an issue
for it to be modernised
""",
)
class CrossColumnMultiplyTransformer(BaseCrossColumnNumericTransformer):
"""Transformer to apply a multiplicative adjustment to values in one column based on the values of another column.
Attributes
----------
adjust_column : str
Column containing the values to be adjusted.
mappings : dict
Dictionary of multiplicative adjustments for each column individually to be applied to the adjust_column.
The dict passed to mappings in init is set to the mappings attribute.
built_from_json: bool
indicates if transformer was reconstructed from json, which limits it's supported
functionality to .transform
polars_compatible : bool
class attribute, indicates whether transformer has been converted to polars/pandas agnostic narwhals framework
jsonable: bool
class attribute, indicates if transformer supports to/from_json methods
FITS: bool
class attribute, indicates whether transform requires fit to be run first
lazyframe_compatible: bool
class attribute, indicates whether transformer works with lazyframes
deprecated: bool
indicates if class has been deprecated
"""
polars_compatible = False
lazyframe_compatible = False
FITS = False
jsonable = False
deprecated = True
def __init__(
self,
adjust_column: str,
mappings: dict[str, dict],
**kwargs: dict[str, bool],
) -> None:
"""Initialise class instance.
Parameters
----------
adjust_column : str
The column to be adjusted. The data type of this column must be int or float.
mappings : dict
Dictionary containing adjustments. Each value in adjustments should be a dictionary
of key (column to apply adjustment based on) value (adjustment dict for given columns) pairs. For
example the following dict {'a': {1: 2, 3: 5}, 'b': {'a': 0.5, 'b': 1.1}}
would multiply the values in the adjustment column based off the values in column a using the mapping
1->2*value, 3->5*value and also multiply based off the values in column b using a mapping
'a'->0.5*value, 'b'->1.1*value.
The values within the dicts defining the multipliers must have type int or float.
**kwargs
Arbitrary keyword arguments passed onto BaseTransformer.init method.
"""
super().__init__(mappings=mappings, adjust_column=adjust_column, **kwargs)
[docs]
def transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""Transform values in given column using the values provided in the adjustments dictionary.
Parameters
----------
X : pd.DataFrame
Data to apply adjustments to.
Returns
-------
X : pd.DataFrame
Transformed data X with adjustments applied to specified columns.
"""
X = super().transform(X)
for i in self.columns:
for j in self.mappings[i]:
X[self.adjust_column] = np.where(
(X[i] == j),
X[self.adjust_column] * self.mappings[i][j],
X[self.adjust_column],
)
return X
[docs]
@deprecated(
"""This transformer has not been selected for conversion to polars/narwhals,
and so has been deprecated. If it is useful to you, please raise an issue
for it to be modernised
""",
)
class CrossColumnAddTransformer(BaseCrossColumnNumericTransformer):
"""Transformer to apply an additive adjustment to values in one column based on the values of another column.
Attributes
----------
adjust_column : str
Column containing the values to be adjusted.
mappings : dict
Dictionary of additive adjustments for each column individually to be applied to the adjust_column.
The dict passed to mappings in init is set to the mappings attribute.
built_from_json: bool
indicates if transformer was reconstructed from json, which limits it's supported
functionality to .transform
polars_compatible : bool
class attribute, indicates whether transformer has been converted to polars/pandas agnostic narwhals framework
jsonable: bool
class attribute, indicates if transformer supports to/from_json methods
FITS: bool
class attribute, indicates whether transform requires fit to be run first
lazyframe_compatible: bool
class attribute, indicates whether transformer works with lazyframes
deprecated: bool
indicates if class has been deprecated
"""
polars_compatible = False
lazyframe_compatible = False
FITS = False
jsonable = False
deprecated = True
def __init__(
self,
adjust_column: str,
mappings: dict[str, dict],
**kwargs: dict[str, bool],
) -> None:
"""Initialise class instance.
Parameters
----------
adjust_column : str
The column to be adjusted. The data type of this column must be int or float.
mappings : dict
Dictionary containing adjustments. Each value in adjustments should be a dictionary
of key (column to apply adjustment based on) value (adjustment dict for given columns) pairs. For
example the following dict {'a': {1: 2, 3: 5}, 'b': {'a': 1, 'b': -5}}
would provide an additive adjustment to the values in the adjustment column based off the values
in column a using the mapping 1->2+value, 3->5+value and also an additive adjustment based off the
values in column b using a mapping 'a'->1+value, 'b'->(-5)+value.
The values within the dicts defining the values to be added must have type int or float.
**kwargs
Arbitrary keyword arguments passed onto BaseTransformer.init method.
"""
super().__init__(mappings=mappings, adjust_column=adjust_column, **kwargs)
[docs]
def transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""Transform values in given column using the values provided in the adjustments dictionary.
Parameters
----------
X : pd.DataFrame
Data to apply adjustments to.
Returns
-------
X : pd.DataFrame
Transformed data X with adjustments applied to specified columns.
"""
X = super().transform(X)
for i in self.columns:
for j in self.mappings[i]:
X[self.adjust_column] = np.where(
(X[i] == j),
X[self.adjust_column] + self.mappings[i][j],
X[self.adjust_column],
)
return X