Metis/app/service/time_series_detector/feature/classification_features.py

#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
Tencent is pleased to support the open source community by making Metis available.
Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
https://opensource.org/licenses/BSD-3-Clause
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
"""

import numpy as np
import tsfresh.feature_extraction.feature_calculators as ts_feature_calculators


def time_series_autocorrelation(x):
    """
    Calculates the autocorrelation of the specified lag, according to the formula [1]

    .. math::

        \\frac{1}{(n-l)\sigma^{2}} \\sum_{t=1}^{n-l}(X_{t}-\\mu )(X_{t+l}-\\mu)

    where :math:`n` is the length of the time series :math:`X_i`, :math:`\sigma^2` its variance and :math:`\mu` its
    mean. `l` denotes the lag.

    .. rubric:: References

    [1] https://en.wikipedia.org/wiki/Autocorrelation#Estimation

    :param x: the time series to calculate the feature of
    :type x: pandas.Series
    :param lag: the lag
    :type lag: int
    :return: the value of this feature
    :return type: float
    """
    lag = int((len(x) - 3) / 5)
    return ts_feature_calculators.autocorrelation(x, lag)


def time_series_coefficient_of_variation(x):
    """
    Calculates the coefficient of variation, mean value / square root of variation

    :param x: the time series to calculate the feature of
    :type x: pandas.Series
    :return: the value of this feature
    :return type: float
    """
    return np.mean(x) / np.sqrt(np.var(x))


def time_series_binned_entropy(x):
    """
    First bins the values of x into max_bins equidistant bins.
    Then calculates the value of

    .. math::

        - \\sum_{k=0}^{min(max\\_bins, len(x))} p_k log(p_k) \\cdot \\mathbf{1}_{(p_k > 0)}

    where :math:`p_k` is the percentage of samples in bin :math:`k`.

    :param x: the time series to calculate the feature of
    :type x: pandas.Series
    :param max_bins: the maximal number of bins
    :type max_bins: int
    :return: the value of this feature
    :return type: float
    """
    max_bins = [2, 4, 6, 8, 10, 20]
    result = []
    for value in max_bins:
        result.append(ts_feature_calculators.binned_entropy(x, value))
    return result

# add yourself classification features here...


def get_classification_features(x):
    classification_features = []
    classification_features.append(time_series_autocorrelation(x))
    classification_features.append(time_series_coefficient_of_variation(x))
    classification_features.extend(time_series_binned_entropy(x))
    # append yourself classification features here...

    return classification_features