Skip to content

Anomaly handlers

cesnet_tszoo.utils.anomaly_handler.anomaly_handler

AnomalyHandler

Bases: ABC

Base class for anomaly handlers, used for handling anomalies in the data.

This class serves as the foundation for creating custom anomaly handlers. To implement a custom anomaly handler, this class is recommended to be subclassed and extended.

Example:

import numpy as np

class InterquartileRange(AnomalyHandler):

    def __init__(self):
        self.lower_bound = {}
        self.upper_bound = {}

    def fit(self, data: np.ndarray) -> None:

        warnings.filterwarnings("ignore")

        for name in data.dtype.names:
            current_data = data[name]

            q25, q75 = np.nanpercentile(current_data, [25, 75], axis=0)
            iqr = q75 - q25

            self.lower_bound[name] = q25 - 1.5 * iqr
            self.upper_bound[name] = q75 + 1.5 * iqr

        warnings.filterwarnings("always")

    def transform_anomalies(self, data: np.ndarray):

        for name in data.dtype.names:
            lower_bound = self.lower_bound[name]
            upper_bound = self.upper_bound[name]
            current_data = data[name]

            lb_broadcast = np.broadcast_to(lower_bound, current_data.shape)
            ub_broadcast = np.broadcast_to(upper_bound, current_data.shape)

            mask_lower = current_data < lb_broadcast
            mask_upper = current_data > ub_broadcast

            current_data[mask_lower] = lb_broadcast[mask_lower]
            current_data[mask_upper] = ub_broadcast[mask_upper]
Source code in cesnet_tszoo\utils\anomaly_handler\anomaly_handler.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
class AnomalyHandler(ABC):
    """
    Base class for anomaly handlers, used for handling anomalies in the data.

    This class serves as the foundation for creating custom anomaly handlers. To implement a custom anomaly handler, this class is recommended to be subclassed and extended.

    Example:

        import numpy as np

        class InterquartileRange(AnomalyHandler):

            def __init__(self):
                self.lower_bound = {}
                self.upper_bound = {}

            def fit(self, data: np.ndarray) -> None:

                warnings.filterwarnings("ignore")

                for name in data.dtype.names:
                    current_data = data[name]

                    q25, q75 = np.nanpercentile(current_data, [25, 75], axis=0)
                    iqr = q75 - q25

                    self.lower_bound[name] = q25 - 1.5 * iqr
                    self.upper_bound[name] = q75 + 1.5 * iqr

                warnings.filterwarnings("always")

            def transform_anomalies(self, data: np.ndarray):

                for name in data.dtype.names:
                    lower_bound = self.lower_bound[name]
                    upper_bound = self.upper_bound[name]
                    current_data = data[name]

                    lb_broadcast = np.broadcast_to(lower_bound, current_data.shape)
                    ub_broadcast = np.broadcast_to(upper_bound, current_data.shape)

                    mask_lower = current_data < lb_broadcast
                    mask_upper = current_data > ub_broadcast

                    current_data[mask_lower] = lb_broadcast[mask_lower]
                    current_data[mask_upper] = ub_broadcast[mask_upper]

    """

    @abstractmethod
    def fit(self, data: np.ndarray) -> None:
        """
        Sets the anomaly handler values for a given time series part.

        This method must be implemented.

        Parameters:
            data: A structured numpy array representing data for a single time series with shape `(times)`. Use data["base_data"] to get non matrix features excluding any identifiers. 
                  For matrix features use their name instead of base_data.   
        """
        ...

    @abstractmethod
    def transform_anomalies(self, data: np.ndarray):
        """
        Transforms anomalies the input data for a given time series part.

        This method must be implemented.
        Anomaly transformation is done in-place.

        Parameters:
            data: A structured numpy array representing data for a single time series with shape `(times)`. Use data["base_data"] to get non matrix features excluding any identifiers. 
                  For matrix features use their name instead of base_data.   

        Returns:
            The changed data, with the same shape and dtype as the input `(times)`.                            
        """
        ...

fit abstractmethod

fit(data: ndarray) -> None

Sets the anomaly handler values for a given time series part.

This method must be implemented.

Parameters:

Name Type Description Default
data ndarray

A structured numpy array representing data for a single time series with shape (times). Use data["base_data"] to get non matrix features excluding any identifiers. For matrix features use their name instead of base_data.

required
Source code in cesnet_tszoo\utils\anomaly_handler\anomaly_handler.py
58
59
60
61
62
63
64
65
66
67
68
69
@abstractmethod
def fit(self, data: np.ndarray) -> None:
    """
    Sets the anomaly handler values for a given time series part.

    This method must be implemented.

    Parameters:
        data: A structured numpy array representing data for a single time series with shape `(times)`. Use data["base_data"] to get non matrix features excluding any identifiers. 
              For matrix features use their name instead of base_data.   
    """
    ...

transform_anomalies abstractmethod

transform_anomalies(data: ndarray)

Transforms anomalies the input data for a given time series part.

This method must be implemented. Anomaly transformation is done in-place.

Parameters:

Name Type Description Default
data ndarray

A structured numpy array representing data for a single time series with shape (times). Use data["base_data"] to get non matrix features excluding any identifiers. For matrix features use their name instead of base_data.

required

Returns:

Type Description

The changed data, with the same shape and dtype as the input (times).

Source code in cesnet_tszoo\utils\anomaly_handler\anomaly_handler.py
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
@abstractmethod
def transform_anomalies(self, data: np.ndarray):
    """
    Transforms anomalies the input data for a given time series part.

    This method must be implemented.
    Anomaly transformation is done in-place.

    Parameters:
        data: A structured numpy array representing data for a single time series with shape `(times)`. Use data["base_data"] to get non matrix features excluding any identifiers. 
              For matrix features use their name instead of base_data.   

    Returns:
        The changed data, with the same shape and dtype as the input `(times)`.                            
    """
    ...

ZScore

Bases: AnomalyHandler

Fitting calculates mean and standard deviation of values used for fitting. Calculated mean and standard deviation calculated when fitting will be used for calculating z-score for every value and those with z-score over or below threshold (3) will be clipped to the threshold value.

Corresponds to enum AnomalyHandlerType.Z_SCORE or literal z-score.

Source code in cesnet_tszoo\utils\anomaly_handler\anomaly_handler.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
class ZScore(AnomalyHandler):
    """
    Fitting calculates mean and standard deviation of values used for fitting. 
    Calculated mean and standard deviation calculated when fitting will be used for calculating z-score for every value and those with z-score over or below threshold (3) will be clipped to the threshold value.

    Corresponds to enum [`AnomalyHandlerType.Z_SCORE`][cesnet_tszoo.utils.enums.AnomalyHandlerType] or literal `z-score`.
    """

    def __init__(self):
        self.mean = {}
        self.std = {}
        self.threshold = 3

    def fit(self, data: np.ndarray) -> None:

        warnings.filterwarnings("ignore")

        for name in data.dtype.names:
            self.mean[name] = np.nanmean(data[name], axis=0)
            self.std[name] = np.nanstd(data[name], axis=0)

        warnings.filterwarnings("always")

    def transform_anomalies(self, data: np.ndarray):

        for name in data.dtype.names:

            mean = self.mean[name]
            std = self.std[name]
            current_data = data[name].view()

            temp = current_data - mean
            z_score = np.divide(temp, std, out=np.zeros_like(temp, dtype=float), where=std != 0)
            mask_outliers = np.abs(z_score) > self.threshold

            clipped_values = mean + np.sign(z_score) * self.threshold * std

            current_data[mask_outliers] = clipped_values[mask_outliers]

InterquartileRange

Bases: AnomalyHandler

Fitting calculates 25th percentile, 75th percentile from the values used for fitting. From those percentiles the interquartile range, lower and upper bound will be calculated. Lower and upper bounds will then be used for detecting anomalies (values below lower bound or above upper bound). Anomalies will then be clipped to closest bound.

Corresponds to enum AnomalyHandlerType.INTERQUARTILE_RANGE or literal interquartile_range.

Source code in cesnet_tszoo\utils\anomaly_handler\anomaly_handler.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
class InterquartileRange(AnomalyHandler):
    """
    Fitting calculates 25th percentile, 75th percentile from the values used for fitting. From those percentiles the interquartile range, lower and upper bound will be calculated.
    Lower and upper bounds will then be used for detecting anomalies (values below lower bound or above upper bound). Anomalies will then be clipped to closest bound.

    Corresponds to enum [`AnomalyHandlerType.INTERQUARTILE_RANGE`][cesnet_tszoo.utils.enums.AnomalyHandlerType] or literal `interquartile_range`.
    """

    def __init__(self):
        self.lower_bound = {}
        self.upper_bound = {}

    def fit(self, data: np.ndarray) -> None:

        warnings.filterwarnings("ignore")

        for name in data.dtype.names:
            current_data = data[name]

            q25, q75 = np.nanpercentile(current_data, [25, 75], axis=0)
            iqr = q75 - q25

            self.lower_bound[name] = q25 - 1.5 * iqr
            self.upper_bound[name] = q75 + 1.5 * iqr

        warnings.filterwarnings("always")

    def transform_anomalies(self, data: np.ndarray):

        for name in data.dtype.names:
            lower_bound = self.lower_bound[name]
            upper_bound = self.upper_bound[name]
            current_data = data[name]

            lb_broadcast = np.broadcast_to(lower_bound, current_data.shape)
            ub_broadcast = np.broadcast_to(upper_bound, current_data.shape)

            mask_lower = current_data < lb_broadcast
            mask_upper = current_data > ub_broadcast

            current_data[mask_lower] = lb_broadcast[mask_lower]
            current_data[mask_upper] = ub_broadcast[mask_upper]

NoAnomalyHandler

Bases: AnomalyHandler

Does nothing.

Corresponds to enum AnomalyHandlerType.NO_ANOMALY_HANDLER or literal no_anomaly_handler.

Source code in cesnet_tszoo\utils\anomaly_handler\anomaly_handler.py
173
174
175
176
177
178
179
180
181
182
183
184
class NoAnomalyHandler(AnomalyHandler):
    """
    Does nothing. 

    Corresponds to enum [`AnomalyHandlerType.NO_ANOMALY_HANDLER`][cesnet_tszoo.utils.enums.AnomalyHandlerType] or literal `no_anomaly_handler`.
    """

    def fit(self, data: np.ndarray) -> None:
        ...

    def transform_anomalies(self, data: np.ndarray):
        ...