Compare commits

..

No commits in common. "master" and "v0.1.0" have entirely different histories.

64 changed files with 451 additions and 1208 deletions

View File

@ -3,7 +3,7 @@
![](docs/images/Metis_logo.png)
[![license](http://img.shields.io/badge/license-BSD3-blue.svg)](https://github.com/tencent/Metis/master/LICENSE.TXT)
[![Release Version](https://img.shields.io/badge/release-0.2.0-red.svg)](https://github.com/tencent/Metis/releases)
[![Release Version](https://img.shields.io/badge/release-0.1.0-red.svg)](https://github.com/tencent/Metis/releases)
[![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/tencent/Metis/pulls)
The name **Metis** is taken from the Greek goddess of wisdom, Metis, which is a collection of application practices in the AIOps field. It mainly solves the problem of intelligent operation and maintenance in terms of quality, efficiency and cost. The current version of the open source time series anomaly detection learnware is to solve the anomaly detection problem of time series data from the perspective of machine learning.

View File

@ -3,7 +3,7 @@
![](docs/images/Metis_logo.png)
[![license](http://img.shields.io/badge/license-BSD3-blue.svg)](https://github.com/tencent/Metis/master/LICENSE.TXT)
[![Release Version](https://img.shields.io/badge/release-0.2.0-red.svg)](https://github.com/tencent/Metis/releases)
[![Release Version](https://img.shields.io/badge/release-0.1.0-red.svg)](https://github.com/tencent/Metis/releases)
[![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/tencent/Metis/pulls)
**Metis** 这个名字取自希腊神话中的智慧女神墨提斯Metis它是一系列AIOps领域的应用实践集合。主要解决在质量、效率、成本方面的智能运维问题。当前版本开源的时间序列异常检测学件是从机器学习的角度来解决时序数据的异常检测问题。

View File

@ -1 +1 @@
__all__ = ["common", "dao", "service"]
__all__ = ["config", "controller", "dao", "model", "service", "utils"]

View File

@ -1 +0,0 @@
__all__ = ["common", "errorcode"]

1
app/config/__init__.py Normal file
View File

@ -0,0 +1 @@
__all__ = ["database", "common", "errorcode"]

View File

@ -8,10 +8,6 @@ https://opensource.org/licenses/BSD-3-Clause
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
"""
import traceback
from functools import wraps
from errorcode import *
DEFAULT_WINDOW = 180
INPUT_LEN_ENG_MAX = 32
INPUT_LEN_CH_MAX = 64
@ -21,20 +17,3 @@ VALUE_LEN_MAX = 50000
UPLOAD_FILE = '/tmp/tmpfile_%s.csv'
MARK_POSITIVE = 1
MARK_NEGATIVE = 2
def build_ret_data(ret_code, data=""):
return {"code": ret_code, "msg": ERR_CODE[ret_code], "data": data}
def exce_service(func):
@wraps(func)
def wrapper(*args, **kwargs):
try:
ret_code, ret_data = func(*args, **kwargs)
return_dict = build_ret_data(ret_code, ret_data)
except Exception as ex:
traceback.print_exc()
return_dict = build_ret_data(THROW_EXP, str(ex))
return return_dict
return wrapper

View File

@ -32,4 +32,4 @@ ERR_CODE = {
READ_FEATURE_FAILED: "读取特征数据失败",
TRAIN_ERR: "训练出错",
LACK_SAMPLE: "缺少正样本或负样本"
}
}

View File

@ -1,16 +1,16 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import json
from functools import wraps
from django.shortcuts import render
from django.http import FileResponse
from render import render_json
from common.render import render_json
from functools import wraps
from app.service.time_series_detector.anomaly_service import *
from app.service.time_series_detector.sample_service import *
from app.service.time_series_detector.task_service import *
from app.service.time_series_detector.detect_service import *
from app.common.errorcode import *
from app.common.common import *
from app.config.errorcode import *
from app.utils.utils import *
def check_post(func):

View File

View File

@ -1 +1 @@
__all__ = ["db_common", "time_series_detector"]
__all__ = ["time_series_detector"]

View File

@ -1 +0,0 @@
__all__ = ["database"]

View File

@ -1 +1 @@
__all__ = ["anomaly_op", "sample_op", "train_op"]
__all__ = ["anomaly_op", "sample_op", "train_op"]

View File

@ -8,11 +8,13 @@ https://opensource.org/licenses/BSD-3-Clause
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
"""
import time
import datetime
import MySQLdb
from app.dao.db_common import database
from app.config import database
from app.dao.time_series_detector.sample_op import *
from app.common.common import *
from app.common.errorcode import *
from app.config.common import *
from app.config.errorcode import *
class AbnormalOperation(object):

View File

@ -13,9 +13,9 @@ import uuid
import csv
import codecs
import MySQLdb
from app.dao.db_common import database
from app.common.common import *
from app.common.errorcode import *
from app.config import database
from app.config.common import *
from app.config.errorcode import *
class SampleOperation(object):

View File

@ -9,9 +9,9 @@ Unless required by applicable law or agreed to in writing, software distributed
"""
import MySQLdb
from app.dao.db_common import database
from app.common.common import *
from app.common.errorcode import *
from app.config import database
from app.config.common import *
from app.config.errorcode import *
class TrainOperation(object):

1
app/model/__init__.py Normal file
View File

@ -0,0 +1 @@
__all__ = ["time_series_detector"]

Binary file not shown.

View File

@ -1 +1 @@
__all__ = ["time_series_detector"]
__all__ = ["time_series_detector"]

View File

@ -1 +1 @@
__all__ = ["anomaly_service", "sample_service", "task_service", "detect_service"]
__all__ = ["algorithm", "feature", "anomaly_service", "sample_service", "task_service", "detect_service"]

View File

@ -8,9 +8,9 @@ https://opensource.org/licenses/BSD-3-Clause
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
"""
from time_series_detector.algorithm import ewma
from time_series_detector.algorithm import polynomial_interpolation
from time_series_detector.common.tsd_common import *
from app.service.time_series_detector.algorithm import ewma
from app.service.time_series_detector.algorithm import polynomial_interpolation
from app.config.common import *
class EwmaAndPolynomialInterpolation(object):

View File

@ -13,12 +13,13 @@ import pickle
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.externals import joblib
from time_series_detector.feature import feature_service
from time_series_detector.common.tsd_common import *
from time_series_detector.common.tsd_errorcode import *
from app.service.time_series_detector.feature import feature_service
from app.utils.utils import *
from app.config.errorcode import *
from app.config.common import *
MODEL_PATH = os.path.join(os.path.dirname(__file__), '../model/')
MODEL_PATH = os.path.join(os.path.dirname(__file__), '../../../model/time_series_detector/')
DEFAULT_MODEL = MODEL_PATH + "gbdt_default_model"
@ -72,7 +73,7 @@ class Gbdt(object):
y_train = []
features = self.__calculate_features(data, window)
if features:
return TSD_LACK_SAMPLE
return LACK_SAMPLE
for index in features:
X_train.append(index[0])
y_train.append(index[1])
@ -84,8 +85,8 @@ class Gbdt(object):
model_name = MODEL_PATH + task_id + "_model"
joblib.dump(grd, model_name)
except Exception as ex:
return TSD_TRAIN_ERR, str(ex)
return TSD_OP_SUCCESS, ""
return TRAIN_ERR, str(ex)
return OP_SUCCESS, ""
def predict(self, X, window=DEFAULT_WINDOW, model_name=DEFAULT_MODEL):
"""

View File

@ -9,7 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed
"""
from sklearn.ensemble import IsolationForest
from time_series_detector.common.tsd_common import *
from app.config.common import *
class IForest(object):

View File

@ -12,7 +12,7 @@ import numpy as np
from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from time_series_detector.common.tsd_common import *
from app.config.common import *
class PolynomialInterpolation(object):

View File

@ -10,10 +10,11 @@ Unless required by applicable law or agreed to in writing, software distributed
import os
import xgboost as xgb
from time_series_detector.feature import feature_service
from time_series_detector.common.tsd_errorcode import *
from time_series_detector.common.tsd_common import *
MODEL_PATH = os.path.join(os.path.dirname(__file__), '../model/')
from app.service.time_series_detector.feature import feature_service
from app.utils.utils import *
from app.config.errorcode import *
from app.config.common import *
MODEL_PATH = os.path.join(os.path.dirname(__file__), '../../../model/time_series_detector/')
DEFAULT_MODEL = MODEL_PATH + "xgb_default_model"
@ -75,7 +76,7 @@ class XGBoosting(object):
try:
f = open(feature_file_name, "w")
except Exception as ex:
return TSD_CAL_FEATURE_ERR, str(ex)
return CAL_FEATURE_ERR, str(ex)
times = 0
for temp in data:
if times > 0:
@ -85,7 +86,7 @@ class XGBoosting(object):
for x in result:
f.write(' ' + x)
times = times + 1
return TSD_OP_SUCCESS, ""
return OP_SUCCESS, ""
def __calculate_features(self, data, feature_file_name, window=DEFAULT_WINDOW):
"""
@ -105,7 +106,7 @@ class XGBoosting(object):
try:
ret_code, ret_data = self.__save_libsvm_format(features, feature_file_name)
except Exception as ex:
ret_code = TSD_CAL_FEATURE_ERR
ret_code = CAL_FEATURE_ERR
ret_data = str(ex)
return ret_code, ret_data
@ -120,12 +121,12 @@ class XGBoosting(object):
model_name = MODEL_PATH + task_id + "_model"
feature_file_name = MODEL_PATH + task_id + "_features"
ret_code, ret_data = self.__calculate_features(data, feature_file_name)
if ret_code != TSD_OP_SUCCESS:
if ret_code != OP_SUCCESS:
return ret_code, ret_data
try:
dtrain = xgb.DMatrix(feature_file_name)
except Exception as ex:
return TSD_READ_FEATURE_FAILED, str(ex)
return READ_FEATURE_FAILED, str(ex)
params = {
'max_depth': self.max_depth,
'eta': self.eta,
@ -142,8 +143,8 @@ class XGBoosting(object):
bst = xgb.train(params, dtrain, num_round)
bst.save_model(model_name)
except Exception as ex:
return TSD_TRAIN_ERR, str(ex)
return TSD_OP_SUCCESS, ""
return TRAIN_ERR, str(ex)
return OP_SUCCESS, ""
def predict(self, X, window=DEFAULT_WINDOW, model_name=DEFAULT_MODEL):
"""

View File

@ -10,6 +10,7 @@ Unless required by applicable law or agreed to in writing, software distributed
import json
from app.dao.time_series_detector.anomaly_op import *
from app.utils.utils import *
class AnomalyService(object):

View File

@ -14,12 +14,11 @@ import threading
from app.dao.time_series_detector import anomaly_op
from app.dao.time_series_detector import sample_op
from app.dao.time_series_detector import train_op
from time_series_detector.algorithm import xgboosting
from time_series_detector import detect
from app.common.errorcode import *
from app.common.common import *
from time_series_detector.common.tsd_errorcode import *
MODEL_PATH = os.path.join(os.path.dirname(__file__), './model/')
from app.utils.utils import *
from app.service.time_series_detector.algorithm import isolation_forest, ewma, polynomial_interpolation, statistic, xgboosting
from app.config.errorcode import *
from app.config.common import *
MODEL_PATH = os.path.join(os.path.dirname(__file__), '../../model/time_series_detector/')
class DetectService(object):
@ -27,7 +26,11 @@ class DetectService(object):
def __init__(self):
self.sample_op_obj = sample_op.SampleOperation()
self.anomaly_op_obj = anomaly_op.AbnormalOperation()
self.detect_obj = detect.Detect()
self.iforest_obj = isolation_forest.IForest()
self.ewma_obj = ewma.Ewma()
self.polynomial_obj = polynomial_interpolation.PolynomialInterpolation()
self.statistic_obj = statistic.Statistic()
self.supervised_obj = xgboosting.XGBoosting()
def __generate_model(self, data, task_id):
"""
@ -120,35 +123,87 @@ class DetectService(object):
return True
def __check_param(self, data):
if ("viewName" not in data.keys()) or ("viewId" not in data.keys()) or ("attrId" not in data.keys()) or ("attrName" not in data.keys()) or ("time" not in data.keys()) or ("dataC" not in data.keys()) or ("dataB" not in data.keys()) or ("dataA" not in data.keys()):
if ("viewName" not in data.keys()) or ("attrId" not in data.keys()) or ("attrName" not in data.keys()) or ("time" not in data.keys()) or ("dataC" not in data.keys()) or ("dataB" not in data.keys()) or ("dataA" not in data.keys()):
return CHECK_PARAM_FAILED, "missing parameter"
if not data['dataA']:
return CHECK_PARAM_FAILED, "dataA can not be empty"
if not data['dataB']:
return CHECK_PARAM_FAILED, "dataB can not be empty"
if not data['dataC']:
return CHECK_PARAM_FAILED, "dataC can not be empty"
if not self.__list_is_digit(data['dataA'].split(',')):
return CHECK_PARAM_FAILED, "dataA contains illegal numbers"
if not self.__list_is_digit(data['dataB'].split(',')):
return CHECK_PARAM_FAILED, "dataB contains illegal numbers"
if not self.__list_is_digit(data['dataC'].split(',')):
return CHECK_PARAM_FAILED, "dataC contains illegal numbers"
if "window" in data:
window = data["window"]
else:
window = DEFAULT_WINDOW
if len(data['dataC'].split(',')) != (2 * window + 1):
return CHECK_PARAM_FAILED, "dataC length does not match"
if len(data['dataB'].split(',')) != (2 * window + 1):
return CHECK_PARAM_FAILED, "dataB length does not match"
if len(data['dataA'].split(',')) != (window + 1):
return CHECK_PARAM_FAILED, "dataA length does not match"
return OP_SUCCESS, ""
def value_predict(self, data):
ret_code, ret_data = self.__check_param(data)
if ret_code != OP_SUCCESS:
return build_ret_data(ret_code, ret_data)
ret_code, ret_data = self.detect_obj.value_predict(data)
if ret_code == TSD_OP_SUCCESS and ret_data["ret"] == 0:
anomaly_params = {
"view_id": data["viewId"],
"view_name": data["viewName"],
"attr_id": data["attrId"],
"attr_name": data["attrName"],
"time": data["time"],
"data_c": data["dataC"],
"data_b": data["dataB"],
"data_a": data["dataA"]
}
self.anomaly_op_obj.insert_anomaly(anomaly_params)
return build_ret_data(ret_code, ret_data)
"""
Predict the data
def rate_predict(self, data):
:param data: the time series to detect of
"""
ret_code, ret_data = self.__check_param(data)
if ret_code != OP_SUCCESS:
return build_ret_data(ret_code, ret_data)
ret_data, ret_data = self.detect_obj.rate_predict(data)
if ret_code == TSD_OP_SUCCESS and ret_data["ret"] == 0:
if "taskId" in data and data["taskId"]:
model_name = MODEL_PATH + data["taskId"] + "_model"
else:
model_name = MODEL_PATH + "xgb_default_model"
combined_data = data["dataC"] + "," + data["dataB"] + "," + data["dataA"]
time_series = map(int, combined_data.split(','))
if "window" in data:
window = data["window"]
else:
window = DEFAULT_WINDOW
statistic_result = self.statistic_obj.predict(time_series)
ewma_result = self.ewma_obj.predict(time_series)
polynomial_result = self.polynomial_obj.predict(time_series, window)
iforest_result = self.iforest_obj.predict(time_series, window)
if statistic_result == 0 or ewma_result == 0 or polynomial_result == 0 or iforest_result == 0:
xgb_result = self.supervised_obj.predict(time_series, window, model_name)
res_value = xgb_result[0]
prob = xgb_result[1]
else:
res_value = 1
prob = 1
ret_data = {"ret": res_value, "p": str(prob)}
if ret_data["ret"] == 0:
anomaly_params = {
"view_id": data["viewId"],
"view_name": data["viewName"],
"attr_id": data["attrId"],
"attr_name": data["attrName"],
"time": data["time"],
"data_c": data["dataC"],
"data_b": data["dataB"],
"data_a": data["dataA"]
}
self.anomaly_op_obj.insert_anomaly(anomaly_params)
return build_ret_data(OP_SUCCESS, ret_data)
def rate_predict(self, data):
combined_data = data["dataC"] + "," + data["dataB"] + "," + data["dataA"]
time_series = map(float, combined_data.split(','))
statistic_result = self.statistic_obj.predict(time_series)
if statistic_result == 0:
prob = 0
else:
prob = 1
ret_data = {"ret": statistic_result, "p": str(prob)}
if ret_data["ret"] == 0:
anomaly_params = {
"view_id": data["viewId"],
"view_name": data["viewName"],

View File

@ -0,0 +1,88 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
Tencent is pleased to support the open source community by making Metis available.
Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
https://opensource.org/licenses/BSD-3-Clause
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
"""
import numpy as np
import tsfresh.feature_extraction.feature_calculators as ts_feature_calculators
def time_series_autocorrelation(x):
"""
Calculates the autocorrelation of the specified lag, according to the formula [1]
.. math::
\\frac{1}{(n-l)\sigma^{2}} \\sum_{t=1}^{n-l}(X_{t}-\\mu )(X_{t+l}-\\mu)
where :math:`n` is the length of the time series :math:`X_i`, :math:`\sigma^2` its variance and :math:`\mu` its
mean. `l` denotes the lag.
.. rubric:: References
[1] https://en.wikipedia.org/wiki/Autocorrelation#Estimation
:param x: the time series to calculate the feature of
:type x: pandas.Series
:param lag: the lag
:type lag: int
:return: the value of this feature
:return type: float
"""
lag = int((len(x) - 3) / 5)
return ts_feature_calculators.autocorrelation(x, lag)
def time_series_coefficient_of_variation(x):
"""
Calculates the coefficient of variation, mean value / square root of variation
:param x: the time series to calculate the feature of
:type x: pandas.Series
:return: the value of this feature
:return type: float
"""
return np.mean(x) / np.sqrt(np.var(x))
def time_series_binned_entropy(x):
"""
First bins the values of x into max_bins equidistant bins.
Then calculates the value of
.. math::
- \\sum_{k=0}^{min(max\\_bins, len(x))} p_k log(p_k) \\cdot \\mathbf{1}_{(p_k > 0)}
where :math:`p_k` is the percentage of samples in bin :math:`k`.
:param x: the time series to calculate the feature of
:type x: pandas.Series
:param max_bins: the maximal number of bins
:type max_bins: int
:return: the value of this feature
:return type: float
"""
max_bins = [2, 4, 6, 8, 10, 20]
result = []
for value in max_bins:
result.append(ts_feature_calculators.binned_entropy(x, value))
return result
# add yourself classification features here...
def get_classification_features(x):
classification_features = [
time_series_autocorrelation(x),
time_series_coefficient_of_variation(x)
]
classification_features.extend(time_series_binned_entropy(x))
# append yourself classification features here...
return classification_features

View File

@ -11,7 +11,7 @@ Unless required by applicable law or agreed to in writing, software distributed
import statistical_features
import classification_features
import fitting_features
from time_series_detector.common import tsd_common
from app.utils import utils
def extract_features(time_series, window):
@ -25,19 +25,18 @@ def extract_features(time_series, window):
:return: the value of features
:return type: list with float
"""
if not tsd_common.is_standard_time_series(time_series, window):
if not utils.is_standard_time_series(time_series, window):
# add your report of this error here...
return []
# spilt time_series
split_time_series = tsd_common.split_time_series(time_series, window)
split_time_series = utils.split_time_series(time_series, window)
# nomalize time_series
normalized_split_time_series = tsd_common.normalize_time_series(split_time_series)
max_min_normalized_time_series = tsd_common.normalize_time_series_by_max_min(split_time_series)
normalized_split_time_series = utils.normalize_time_series(split_time_series)
s_features = statistical_features.get_statistical_features(normalized_split_time_series[4])
f_features = fitting_features.get_fitting_features(normalized_split_time_series)
c_features = classification_features.get_classification_features(max_min_normalized_time_series)
c_features = classification_features.get_classification_features(normalized_split_time_series[0] + normalized_split_time_series[1][1:] + normalized_split_time_series[2] + normalized_split_time_series[3][1:] + normalized_split_time_series[4])
# combine features with types
features = s_features + f_features + c_features
return features

View File

@ -9,7 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed
"""
import numpy as np
from time_series_detector.common.tsd_common import *
from app.config.common import *
def time_series_moving_average(x):
@ -49,7 +49,7 @@ def time_series_weighted_moving_average(x):
for w in range(1, min(50, DEFAULT_WINDOW), 5):
w = min(len(x), w) # avoid the case len(value_list) < w
coefficient = np.array(range(1, w + 1))
temp_list.append((np.dot(coefficient, x[-w:])) / float(w * (w + 1) / 2))
temp_list.append((np.dot(coefficient, x[-w:])) / (w * (w + 1) / 2))
return list(np.array(temp_list) - x[-1])
@ -210,11 +210,6 @@ def time_series_periodic_features(data_c_left, data_c_right, data_b_left, data_b
periodic_features.append(-1)
else:
periodic_features.append(1)
step = DEFAULT_WINDOW / 6
for w in range(1, DEFAULT_WINDOW, step):
periodic_features.append(min(max(data_a[w - 1:w + step]) - data_a[-1], 0))
periodic_features.append(max(min(data_a[w - 1:w + step]) - data_a[-1], 0))
return periodic_features
# add yourself fitting features here...

View File

@ -12,8 +12,9 @@ import json
import traceback
import csv
from app.dao.time_series_detector.sample_op import *
from app.common.errorcode import *
from app.common.common import *
from app.config.errorcode import *
from app.utils.utils import *
from app.config.common import *
class SampleService(object):

View File

@ -10,8 +10,8 @@ Unless required by applicable law or agreed to in writing, software distributed
import json
from app.dao.time_series_detector.train_op import *
from app.common.errorcode import *
from app.common.common import *
from app.config.errorcode import *
from app.utils.utils import *
class TrainService(object):

View File

@ -4,16 +4,16 @@ SET FOREIGN_KEY_CHECKS=0;
-- ----------------------------
DROP TABLE IF EXISTS `anomaly`;
CREATE TABLE `anomaly` (
`id` int(10) NOT NULL AUTO_INCREMENT,
`view_id` varchar(31) NOT NULL DEFAULT '' comment '指标集id',
`view_name` varchar(63) NOT NULL DEFAULT '' comment '指标集名',
`attr_id` varchar(31) NOT NULL DEFAULT '' comment '指标id',
`attr_name` varchar(63) NOT NULL DEFAULT '' comment '指标名',
`time` datetime DEFAULT NULL comment '数据时间',
`data_c` text,
`data_b` text,
`data_a` text,
`mark_flag` tinyint(1) NOT NULL DEFAULT 0 comment '0:没有打标、1:打标为正样本、2:打标为负样本',
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`view_id` varchar(31) DEFAULT NULL,
`view_name` varchar(63) DEFAULT NULL,
`attr_id` varchar(31) DEFAULT NULL,
`attr_name` varchar(63) DEFAULT NULL,
`time` datetime DEFAULT NULL,
`data_c` text NOT NULL,
`data_b` text NOT NULL,
`data_a` text NOT NULL,
`mark_flag` int(1) NOT NULL DEFAULT '0',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

View File

@ -4,21 +4,21 @@ SET FOREIGN_KEY_CHECKS=0;
-- ----------------------------
DROP TABLE IF EXISTS `sample_dataset`;
CREATE TABLE `sample_dataset` (
`id` int(10) NOT NULL AUTO_INCREMENT,
`update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '样本更新时间',
`view_id` varchar(31) NOT NULL DEFAULT '' comment '指标集id',
`view_name` varchar(63) NOT NULL DEFAULT '' comment '指标名',
`attr_name` varchar(63) NOT NULL DEFAULT '' comment '指标名',
`attr_id` varchar(31) NOT NULL DEFAULT '' comment '指标id',
`source` varchar(31) NOT NULL DEFAULT '' comment '样本来源',
`train_or_test` varchar(10) NOT NULL DEFAULT '' comment 'test测试样本、train:训练样本',
`positive_or_negative` varchar(20) NOT NULL DEFAULT '' comment 'positive:正样本、negative负样本',
`window` int(10) NOT NULL DEFAULT 0 comment '窗口值目前支持180',
`data_time` int(10) DEFAULT NULL comment '样本时间',
`id` bigint(10) NOT NULL AUTO_INCREMENT,
`update_time` timestamp NULL DEFAULT NULL,
`view_id` varchar(31) DEFAULT NULL,
`view_name` varchar(63) DEFAULT NULL,
`attr_name` varchar(63) DEFAULT NULL,
`attr_id` varchar(31) DEFAULT NULL,
`source` varchar(31) DEFAULT NULL,
`train_or_test` varchar(31) DEFAULT NULL,
`positive_or_negative` varchar(31) DEFAULT NULL,
`window` int(2) DEFAULT NULL,
`data_time` int(11) DEFAULT NULL,
`data_c` text,
`data_b` text,
`data_a` text,
`anomaly_id` int(10) DEFAULT NULL comment '标识从anomaly里插入的样本',
`anomaly_id` bigint(10) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

View File

@ -1,24 +1,25 @@
SET FOREIGN_KEY_CHECKS=0;
-- ----------------------------
-- Table structure for `train_task`
-- ----------------------------
DROP TABLE IF EXISTS `train_task`;
CREATE TABLE `train_task` (
`id` int(10) NOT NULL AUTO_INCREMENT,
`task_id` varchar(20) NOT NULL DEFAULT '' comment '训练任务id',
`sample_num` int(10) NOT NULL DEFAULT 0 comment '训练总样本数',
`postive_sample_num` int(10) NOT NULL DEFAULT 0 comment '训练正样本数',
`negative_sample_num` int(10) NOT NULL DEFAULT 0 comment '训练负样本数',
`window` int(10) NOT NULL DEFAULT 0 comment '窗口值目前支持180',
`model_name` varchar(20) NOT NULL DEFAULT '' comment '模型名',
`source` varchar(255) NOT NULL DEFAULT '' comment '样本来源',
`start_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '训练任务开始时间',
`end_time` timestamp NULL DEFAULT NULL comment '训练任务结束时间',
`status` varchar(11) NOT NULL DEFAULT '' comment 'complete:任务完成、running:任务正在运行、failed任务失败',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
-- ----------------------------
-- Records of train_task
-- ----------------------------
INSERT INTO `train_task` VALUES ('1', '1535790960079', '90675', '45228', '45447', '180', 'xgb_default_model', 'Metis', '2018-09-01 16:36:00', '2018-09-01 16:45:40', 'complete');
SET FOREIGN_KEY_CHECKS=0;
-- ----------------------------
-- Table structure for `train_task`
-- ----------------------------
DROP TABLE IF EXISTS `train_task`;
CREATE TABLE `train_task` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`task_id` char(255) DEFAULT NULL,
`sample_num` int(11) DEFAULT NULL,
`postive_sample_num` int(11) DEFAULT NULL,
`negative_sample_num` int(11) DEFAULT NULL,
`window` int(2) DEFAULT NULL,
`model_name` varchar(20) DEFAULT NULL,
`source` varchar(255) DEFAULT NULL,
`start_time` timestamp NULL DEFAULT NULL,
`end_time` timestamp NULL DEFAULT NULL,
`status` varchar(11) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `id` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
-- ----------------------------
-- Records of train_task
-- ----------------------------
INSERT INTO `train_task` VALUES ('1', '1535790960079', '90675', '45228', '45447', '180', 'xgb_default_model', 'Metis', '2018-09-01 16:36:00', '2018-09-01 16:45:40', 'complete');

1
app/utils/__init__.py Normal file
View File

@ -0,0 +1 @@
__all__ = ["utils"]

View File

@ -1,97 +1,99 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
Tencent is pleased to support the open source community by making Metis available.
Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
https://opensource.org/licenses/BSD-3-Clause
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
"""
import numpy as np
DEFAULT_WINDOW = 180
def is_standard_time_series(time_series, window=DEFAULT_WINDOW):
"""
Check the length of time_series. If window = 180, then the length of time_series should be 903.
The mean value of last window should be larger than 0.
:param time_series: the time series to check, like [data_c, data_b, data_a]
:type time_series: pandas.Series
:param window: the length of window
:return: True or False
:return type: boolean
"""
return bool(len(time_series) == 5 * window + 3 and np.mean(time_series[(4 * window + 2):]) > 0)
def split_time_series(time_series, window=DEFAULT_WINDOW):
"""
Spilt the time_series into five parts. Each has a length of window + 1
:param time_series: [data_c, data_b, data_a]
:param window: the length of window
:return: spilt list [[data_c_left], [data_c_right], [data_b_left], [data_b_right], [data_a]]
"""
data_c_left = time_series[0:(window + 1)]
data_c_right = time_series[window:(2 * window + 1)]
data_b_left = time_series[(2 * window + 1):(3 * window + 2)]
data_b_right = time_series[(3 * window + 1):(4 * window + 2)]
data_a = time_series[(4 * window + 2):]
split_time_series = [
data_c_left,
data_c_right,
data_b_left,
data_b_right,
data_a
]
return split_time_series
def normalize_time_series(split_time_series):
"""
Normalize the split_time_series.
:param split_time_series: [[data_c_left], [data_c_right], [data_b_left], [data_b_right], [data_a]]
:return: all list / mean(split_time_series)
"""
value = np.mean(split_time_series[4])
if value > 1:
normalized_data_c_left = list(split_time_series[0] / value)
normalized_data_c_right = list(split_time_series[1] / value)
normalized_data_b_left = list(split_time_series[2] / value)
normalized_data_b_right = list(split_time_series[3] / value)
normalized_data_a = list(split_time_series[4] / value)
else:
normalized_data_c_left = split_time_series[0]
normalized_data_c_right = split_time_series[1]
normalized_data_b_left = split_time_series[2]
normalized_data_b_right = split_time_series[3]
normalized_data_a = split_time_series[4]
normalized_split_time_series = [
normalized_data_c_left,
normalized_data_c_right,
normalized_data_b_left,
normalized_data_b_right,
normalized_data_a
]
return normalized_split_time_series
def normalize_time_series_by_max_min(split_time_series):
"""
Normalize the split_time_series by max_min_normalization.
:param split_time_series: [[data_c_left], [data_c_right], [data_b_left], [data_b_right], [data_a]]
:return: max_min_normalized time_series
"""
time_series = split_time_series[0] + split_time_series[1][1:] + split_time_series[2] + split_time_series[3][1:] + split_time_series[4]
max_value = np.max(time_series)
min_value = np.min(time_series)
normalized_time_series = [0.0]*len(time_series)
if max_value - min_value > 0:
normalized_time_series = list((np.array(time_series) - min_value) / float(max_value - min_value))
return normalized_time_series
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
Tencent is pleased to support the open source community by making Metis available.
Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
https://opensource.org/licenses/BSD-3-Clause
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
"""
import numpy as np
import traceback
from functools import wraps
from app.config.errorcode import *
from app.config.common import *
def is_standard_time_series(time_series, window=DEFAULT_WINDOW):
"""
Check the length of time_series. If window = 180, then the length of time_series should be 903.
The mean value of last window should be larger than 0.
:param time_series: the time series to check, like [data_c, data_b, data_a]
:type time_series: pandas.Series
:param window: the length of window
:return: True or False
:return type: boolean
"""
return bool(len(time_series) == 5 * window + 3 and np.mean(time_series[(4 * window + 2):]) > 0)
def split_time_series(time_series, window=DEFAULT_WINDOW):
"""
Spilt the time_series into five parts. Each has a length of window + 1
:param time_series: [data_c, data_b, data_a]
:param window: the length of window
:return: spilt list [[data_c_left], [data_c_right], [data_b_left], [data_b_right], [data_a]]
"""
data_c_left = time_series[0:(window + 1)]
data_c_right = time_series[window:(2 * window + 1)]
data_b_left = time_series[(2 * window + 1):(3 * window + 2)]
data_b_right = time_series[(3 * window + 1):(4 * window + 2)]
data_a = time_series[(4 * window + 2):]
split_time_series = [
data_c_left,
data_c_right,
data_b_left,
data_b_right,
data_a
]
return split_time_series
def normalize_time_series(split_time_series):
"""
Normalize the split_time_series.
:param split_time_series: [[data_c_left], [data_c_right], [data_b_left], [data_b_right], [data_a]]
:return: all list / mean(split_time_series)
"""
value = np.mean(split_time_series[4])
if value > 1:
normalized_data_c_left = list(split_time_series[0] / value)
normalized_data_c_right = list(split_time_series[1] / value)
normalized_data_b_left = list(split_time_series[2] / value)
normalized_data_b_right = list(split_time_series[3] / value)
normalized_data_a = list(split_time_series[4] / value)
else:
normalized_data_c_left = split_time_series[0]
normalized_data_c_right = split_time_series[1]
normalized_data_b_left = split_time_series[2]
normalized_data_b_right = split_time_series[3]
normalized_data_a = split_time_series[4]
normalized_split_time_series = [
normalized_data_c_left,
normalized_data_c_right,
normalized_data_b_left,
normalized_data_b_right,
normalized_data_a
]
return normalized_split_time_series
def build_ret_data(ret_code, data=""):
return {"code": ret_code, "msg": ERR_CODE[ret_code], "data": data}
def exce_service(func):
@wraps(func)
def wrapper(*args, **kwargs):
try:
ret_code, ret_data = func(*args, **kwargs)
return_dict = build_ret_data(ret_code, ret_data)
except Exception as ex:
traceback.print_exc()
return_dict = build_ret_data(THROW_EXP, str(ex))
return return_dict
return wrapper

View File

@ -1,26 +0,0 @@
# ChangeLog
## [2018-11-07, Version v0.2.0](https://github.com/Tencent/Metis/releases/tag/v0.2.0)
### Bug fixes
- [[```48618f59```](https://github.com/Tencent/Metis/commit/48618f59cb70249cba2d01d5413cbb4eea418721)] __-__ __docker__: start.sh dos2unix (lxd1190)
- [[```86076d84```](https://github.com/Tencent/Metis/commit/86076d843fab582c46728a7916a81aef7f1b78c3)] __-__ __docker__: update docker image (lxd1190)
### Code Refactoring
- [[```21b2b161```](https://github.com/Tencent/Metis/commit/21b2b1614d91eec1fc2fb07c6628f6a3868523e7)] __-__ __app__: refactor dictionary: add app module (lxd1190)
- [[```5faf04cf```](https://github.com/Tencent/Metis/commit/5faf04cf06643f7c9f3833daba7c81a31c028eef)] __-__ __app__: refactor dictionary (lxd1190)
### Other commits
- [[```e3167d25```](https://github.com/Tencent/Metis/commit/e3167d25c92cb9c852cdd5100de61c30f62ce9d5)] __-__ __docs(architecture docs)__ : update docs content (lxd1190)
- [[```86569e65```](https://github.com/Tencent/Metis/commit/86569e65bc4f5717fdd35c7511347f6e129f109d)] __-__ __docs(code_framework)__ : update arch picture and code description (lxd1190)
- [[```c739f92c```](https://github.com/Tencent/Metis/commit/c739f92ca6def3e37c75641c0bf22e41eb4e3c11)] __-__ __docs(install)__ : update db config path (lxd1190)

View File

@ -12,5 +12,5 @@ docker rm -f metis-web
docker run --net=host --name=metis-db -d -p 3306:3306 -v /data/metis/mysql/:/var/lib/mysql:Z -e MYSQL_ROOT_PASSWORD=metis@123 zhiyunmetis/metis-db
sleep 6
docker run --net=host --name=metis-svr -d -p 8080:8080 -v /data/metis/model/:/metis/time_series_detector/model:Z zhiyunmetis/metis-svr /bin/sh /metis/init.sh
docker run --net=host --name=metis-web -d -p 80:80 zhiyunmetis/metis-web /bin/sh /metis/init.sh ${ip}
docker run --net=host --name=metis-svr -d -p 8080:8080 -v /data/metis/module/:/metis/app/model/time_series_detector:Z zhiyunmetis/metis-svr /bin/sh /metis/init.sh
docker run --net=host --name=metis-web -d -p 80:80 zhiyunmetis/metis-web /bin/sh /metis/init.sh ${ip}

View File

@ -1,22 +1,20 @@
# API接口文档
## 时间序列异常检测接口
## 时间序列异常检测学件接口
用户可根据场景选择使用API接口对时间序列进行异常检测
用户可使用API接口对时间序列进行异常检测检测后的结果通过WEB管理端查看和管理。 服务端提供两个异常检测的API接口供不同场景调用
1、量值检测适用于大多数KPI指标数据的检测,使用无监督和有监督联合检测,会加载检测模型
1、量值检测适用于大多数类型数据的检测,使用无监督和有监督联合检测,会加载检测模型
2、率值检测适用于正态分布类型数据的检测使用无监督算法进行检测如成功率等生死指标数据的检测
- HTTP接口调用请使用搭建的后端服务地址Python接口可直接调用
- API请求调用请使用搭建的后端服务地址
- 当前检测时间窗口选取为3小时每分钟1个数据点即窗口值为180
- 同比数据日期和时间段的选择可根据实际情况调整,文档中两个同比数据分别取昨日和一周前的同比
针对当前一个值的检测,需要依赖过去三段数据,数据选取规则参考示例图:
![data_info](images/data_info.png)
### 一、HTTP接口
#### 1、量值检测
### 1、量值检测
* API POST /{ip}:{port}/PredictValue
* 功能说明:根据参考数据检测最近一个数据点是否异常
@ -75,7 +73,7 @@
| ret | int | 检测结果是否异常。0:异常1:正常 |
| p | string | 概率值值越小判定为异常的置信度越高目前p<0.15判决为异常 |
#### 2、率值检测
### 2、率值检测
* API POST /{ip}:{port}/PredictRate
* 功能说明:根据参考数据检测最近一个数据点是否异常
@ -131,404 +129,4 @@
| code | int | 返回码。0:成功非0:失败 |
| msg | string | 返回消息 |
| ret | int | 检测结果是否异常。0:异常1:正常 |
| p | string | 概率值,值越小,判定为异常的置信度越高 |
### 二、Python API
Metis工程目录下time_series_detector目录为时间序列异常检测学件可以在python代码中直接调用
#### 1、量值检测
* 功能说明:根据参考数据检测最近一个数据点是否异常
* 调用方法:
```
# Python
from time_series_detector import detect
detect_obj = detect.Detect()
detect_obj.value_predict(data)
```
* 传入参数python字典
```
{
"window":180,
"dataC":"9,10,152,...,255,...,16",
"dataB":"9,10,152,...,255,...,18",
"dataA":"9,10,152,...,458"
}
```
* 传入参数说明:
| 名称 | 类型 |必填| 默认值 | 说明 |
| --- | --- | --- |---- | --- |
| taskId| string| 否| 无|使用的检测模型,如不传,则采用系统默认模型|
| window| int| 否| 无|窗口值目前支持180|
| dataC| string| 是| 无|待检测的1个点对应一周前同时刻的点 + 前后各180个数据361个数据点按时间顺序拼接英文逗号分隔|
| dataB| string| 是| 无|待检测的1个点对应昨日同时刻的点 + 前后各180个数据361个数据点按时间顺序拼接英文逗号分隔|
| dataA| string| 是| 无|待检测的1个点+前180个数据共181个数据点181个数据点按时间顺序拼接英文逗号分隔|
* 返回参数:
```
code, {
"ret":0,
"p":"0.05",
}
```
* 返回参数说明:
| 名称 | 类型 | 说明 |
|---|---|---|
| code | int | 返回码。0:成功非0:失败 |
| ret | int | 检测结果是否异常。0:异常1:正常 |
| p | string | 概率值值越小判定为异常的置信度越高目前p<0.15判决为异常 |
* 调用案例:
![data_info](images/python_api_value_predict.png)
#### 2、率值检测
* 功能说明:根据参考数据检测最近一个数据点是否异常
* 调用方法:
```
# Python
from time_series_detector import detect
detect_obj = detect.Detect()
detect_obj.rate_predict(data)
```
* 传入参数python字典
```
{
"dataC":"9,10,152,...,255,...,16",
"dataB":"9,10,152,...,255,...,18",
"dataA":"9,10,152,...,458"
}
```
* 传入参数说明:
| 名称 | 类型 |必填| 默认值 | 说明 |
| --- | --- | --- |--- | --- |
| dataC| string| 是| 无|待检测的1个点对应一周前同时刻的点 + 前后各180个数据361个数据点按时间顺序拼接英文逗号分隔|
| dataB| string| 是| 无|待检测的1个点对应昨日同时刻的点 + 前后各180个数据361个数据点按时间顺序拼接英文逗号分隔|
| dataA| string| 是| 无|待检测的1个点+前180个数据共181个数据点181个数据点按时间顺序拼接英文逗号分隔|
* 返回参数:
```
code, {
"ret":0,
"p":"0",
}
```
* 返回参数说明:
| 名称 | 类型 | 说明 |
|---|---|---|
| code | int | 返回码。0:成功非0:失败 |
| ret | int | 检测结果是否异常。0:异常1:正常 |
| p | string | 概率值值越小判定为异常的置信度越高目前p<0.15判决为异常 |
* 调用案例:
![data_info](images/python_api_rate_predict.png)
### 三、LIB库
Metis工程目录下time_series_detector/lib为学件动态库目录库文件可以在代码中加载调用
libdetect.so目前支持在CentOs7.2+系统环境下使用
#### Python代码中调用:
##### 1、量值检测
* 功能说明:根据参考数据检测最近一个数据点是否异常
* 调用方法:
加载so库
```
# Python
so = cdll.LoadLibrary
metis_lib = so("./libdetect.so")
handle = metis_lib.load_model("./xgb_default_model")
```
构造传入数据:
```
# Python
from ctypes import *
class ValueData(Structure):
_fields_ = [('data_a', POINTER(c_int)), ('data_b', POINTER(c_int)), ('data_c', POINTER(c_int)),
('len_a', c_int), ('len_b', c_int), ('len_c', c_int)]
# test data
data_c = [1] * 361
data_b = [1] * 361
data_a = [1] * 180
data_a.append(10)
paarray = (c_int * len(data_a))(*data_a)
pbarray = (c_int * len(data_b))(*data_b)
pcarray = (c_int * len(data_c))(*data_c)
data_value = ValueData(paarray, pbarray, pcarray, len(data_a), len(data_b), len(data_c))
```
调用计算函数:
```
#python
result = c_int()
prob = c_float()
ret_code = metis_lib.value_predict(handle, byref(data_value), byref(result), byref(prob))
if ret_code != 0:
print "value_predict error code = %d" % ret_code
print result, prob
```
* 传入参数C结构体
```
typedef struct {
int* data_a;
int* data_b;
int* data_c;
int len_a;
int len_b;
int len_c;
} ValueData;
```
* 传入参数说明:
| 名称 | 类型 |必填| 默认值 | 说明 |
| --- | --- | --- |---- | --- |
| handle| int| 是| 无|模型句柄由load_model返回|
| data_value| ValueData| 是| 无|待检测数据|
* 返回参数:
```
ret_code
result
prob
```
* 返回参数说明:
| 名称 | 类型 | 说明 |
|---|---|---|
| ret_code | int | 返回码。0:成功非0:失败 |
| result | c_int | 检测结果是否异常。0:异常1:正常 |
| prob | c_float | 概率值值越小判定为异常的置信度越高目前prob<0.15判决为异常 |
##### 2、率值检测
* 功能说明:根据参考数据检测最近一个数据点是否异常
* 调用方法:
加载so库
```
# Python
so = cdll.LoadLibrary
metis_lib = so("./libdetect.so")
```
构造传入数据:
```
# Python
from ctypes import *
class RateData(Structure):
_fields_ = [('data_a', POINTER(c_double)), ('data_b', POINTER(c_double)), ('data_c', POINTER(c_double)),
('len_a', c_int), ('len_b', c_int), ('len_c', c_int)]
# test data
data_c = [1.0] * 361
data_b = [1.0] * 361
data_a = [1.0] * 180
data_a.append(0.9)
paarray = (c_double * len(data_a))(*data_a)
pbarray = (c_double * len(data_b))(*data_b)
pcarray = (c_double * len(data_c))(*data_c)
data_value = RateData(paarray, pbarray, pcarray, len(data_a), len(data_b), len(data_c))
```
调用计算函数:
```
#python
result = c_int()
prob = c_float()
ret_code = metis_lib.rate_predict(byref(data_value), byref(result), byref(prob))
if ret_code != 0:
print "value_predict error code = %d" % ret_code
print result, prob
```
* 传入参数C结构体
```
typedef struct {
double* data_a;
double* data_b;
double* data_c;
int len_a;
int len_b;
int len_c;
} RateData;
```
* 传入参数说明:
| 名称 | 类型 |必填| 默认值 | 说明 |
| --- | --- | --- |---- | --- |
| data_value| RateData| 是| 无|待检测数据|
* 返回参数:
```
ret_code
result
prob
```
* 返回参数说明:
| 名称 | 类型 | 说明 |
|---|---|---|
| ret_code | int | 返回码。0:成功非0:失败 |
| result | c_int | 检测结果是否异常。0:异常1:正常 |
| prob | c_float | 概率值,值越小,判定为异常的置信度越高 |
#### C代码中调用:
在C中调用检测函数需要include头文件detect.h在编译时链接libdetect.so文件
##### 1、量值检测
* 功能说明:根据参考数据检测最近一个数据点是否异常
* 调用方法:
调用load_model加载模型然后调用value_predict进行预测
```
#include "detect.h"
if (NULL == (handle = load_model("./xgb_default_model")))
{
printf("load model error\n");
return 0;
}
int ret = value_predict(handle, &value_data, &sample_result, &prob);
printf ("ret=%d result = %d prob = %f\n", ret, sample_result, prob);
```
* 传入参数C结构体
```
typedef struct {
int* data_a;
int* data_b;
int* data_c;
int len_a;
int len_b;
int len_c;
} ValueData;
```
* 传入参数说明:
| 名称 | 类型 |必填| 默认值 | 说明 |
| --- | --- | --- |---- | --- |
| handle| int| 是| 无|模型句柄由load_model返回|
| value_data| ValueData| 是| 无|待检测数据|
* 返回参数:
```
ret
sample_result
prob
```
* 返回参数说明:
| 名称 | 类型 | 说明 |
|---|---|---|
| ret | int | 返回码。0:成功非0:失败 |
| sample_result | c_int | 检测结果是否异常。0:异常1:正常 |
| prob | c_float | 概率值值越小判定为异常的置信度越高目前prob<0.15判决为异常 |
##### 2、率值检测
* 功能说明:根据参考数据检测最近一个数据点是否异常
* 调用方法:
```
#include "detect.h"
float prob;
int sample_result;
int ret = rate_predict(&rate_data, &sample_result, &prob);
printf ("ret=%d result =%d prob = %f \n", ret, sample_result, prob);
```
* 传入参数C结构体
```
typedef struct {
double* data_a;
double* data_b;
double* data_c;
int len_a;
int len_b;
int len_c;
} RateData;
```
* 传入参数说明:
| 名称 | 类型 |必填| 默认值 | 说明 |
| --- | --- | --- |---- | --- |
| rate_data| RateData| 是| 无|待检测数据|
* 返回参数:
```
ret
sample_result
prob
```
* 返回参数说明:
| 名称 | 类型 | 说明 |
|---|---|---|
| ret | int | 返回码。0:成功非0:失败 |
| result | c_int | 检测结果是否异常。0:异常1:正常 |
| prob | c_float | 概率值,值越小,判定为异常的置信度越高 |
| p | string | 概率值,值越小,判定为异常的置信度越高 |

View File

@ -1,25 +1,21 @@
## 时间序列异常检测学件的架构
## 时间序列异常检测学件的架构
![code_arch.png](images/code_arch.png)
时间序列异常检测工程的整体分层,可以分为以下五层:
时间序列异常检测学件的整体分层涉及,可以分为以下四层:
1. **数据层DB**:存储检测异常信息、样本信息、任务信息等
1. **数据层DB**:存储检测异常信息、样本信息、任务信息等
2. **服务层(SERVICE)** 服务层划分为两大模块
2. **服务层(server)** 服务层划分为四大模块
1. **数据驱动模块DAO** 封装了和DB层常见的数据操作接口。
1. **数据驱动模块DAO** 封装了和DB层常见的数据操作接口。
2. **业务模块service** 完成API层的具体业务逻辑
2. **特征计算模块feature** 提供三类时间序列的特征(统计特征、拟合特征、分类特征)用于对时序数据进行特征提取,在监督学习和训练中使用
3. **学件层(LEARNWARE)**:学件层划分为三大模块
1. **检测模块detect** 提供时间序列异常检测接口
3. **算法模块feature** 提供常见的几种机器学习算法封装统计判别算法、指数移动平均算法、多项式算法、GBDT和xgboost等用于对序数据进行联合仲裁检测。
2. **特征计算模块features** 提供三类时间序列的特征(统计特征、拟合特征、分类特征)用于对时序数据进行特征提取,在监督学习和训练中使用
4. **业务模块business** 业务模块是基于原子接口封装完成API层的具体业务逻辑。
3. **算法模块algorithm** 提供常见的几种机器学习算法封装统计判别算法、指数移动平均算法、多项式算法、GBDT和xgboost等用于对序数据进行联合仲裁检测。
4. **接口层(API)** 提供API能力时间序列异常检测接口和WEB管理的操作接口。
3. **接口层(api)** 提供API能力时间序列异常检测接口和WEB管理的操作接口。
5. **WEB层(WEB)** 系统提供的WEB服务通过服务界面用户可以进行异常查询、打标标注、样本库管理、模型训练等操作。
4. **WEB层(web)** 系统提供的WEB服务通过服务界面用户可以进行异常查询、打标标注、样本库管理、模型训练等操作。

View File

@ -1,67 +1,66 @@
## 项目目录结构
## 项目目录结构
项目开发的目录结构保持一致,容易理解并方便管理。
项目开发的目录结构保持一致,容易理解并方便管理。
## 目录结构
## 目录结构
- `/app/` 服务端工作目录
- `/app/` 服务端工作目录
`/app/controller/` 路由入口Action层
`/app/controller/` 路由入口Action层
`/app/common/` 存放公共函数和常量定义
`/app/config/` 业务配置层
`/app/dao/` 数据库表实例层
`/app/dao/` 数据库表实例层
`/app/service/` 业务逻辑层
`/app/model/` 模型文件存放目录
- `/uweb/` 管理端工作目录
`/app/service/` 业务逻辑层
`/uweb/custom/` WEB端所需静态文件目录
`/app/service/algorithm/` 算法层
`/uweb/lib/` WEB端框架目录
`/app/service/feature/` 特征层
`/uweb/src/` WEB端开发目录
`/app/utils/` 存放公共函数
`/uweb/src/pages/` WEB端所有页面的目录
- `/uweb/` 管理端总工作目录
`/uweb/src/plugins/` WEB端自定义插件目录
`/uweb/custom/` WEB端所需静态文件目录
`/uweb/src/app.json` WEB端配置文件
`/uweb/lib/` WEB端框架目录
`/uweb/src/app.less` WEB端全局样式文件
`/uweb/src/` WEB端开发目录
`/uweb/dist/` WEB端打包后的静态文件目录
`/uweb/src/pages/` WEB端所有页面的目录
- `/time_series_detector/` 时间序列异常检测学件目录
`/uweb/src/plugins/` WEB端自定义插件目录
`/time_series_detector/model/` 模型文件存放目录
`/time_series_detector/algorithm/` 算法层
`/uweb/src/app.json` WEB端配置文件
`/time_series_detector/feature/` 特征层
项目中支持以下类型的文件:
1. `.json`: 配置文件
2. `.uwx`: UWEB 视图文件
3. `.uw`: UWEB 逻辑脚本
4. `.js`: 普通 JavaScript 逻辑脚本
5. `.ts`: 普通 TypeScript 逻辑脚本
6. `.less`: Less 样式文件
7. `.css`: CSS 样式文件
8. `.jsx`: 开发自定义插件时可使用的 JavaScript React 脚本文件
9. `.tsx`: 开发自定义插件时可使用的 TypeScript React 脚本文件
10. `.png`、`.jpg`、`.gif`、`.svg`: 图片文件
`/uweb/src/app.less` WEB端全局样式文件
- `/docs/` 项目文档存放目录
`/uweb/dist/` WEB端打包后的静态文件目录
项目中支持以下类型的文件:
1. `.json`: 配置文件
2. `.uwx`: UWEB 视图文件
3. `.uw`: UWEB 逻辑脚本
4. `.js`: 普通 JavaScript 逻辑脚本
5. `.ts`: 普通 TypeScript 逻辑脚本
6. `.less`: Less 样式文件
7. `.css`: CSS 样式文件
8. `.jsx`: 开发自定义插件时可使用的 JavaScript React 脚本文件
9. `.tsx`: 开发自定义插件时可使用的 TypeScript React 脚本文件
10. `.png`、`.jpg`、`.gif`、`.svg`: 图片文件
- `/docs/` 项目文档存放目录
## 调用关系
## 调用关系
`uweb` 为管理端工作目录,可调用服务端接口
`/app/controller/` 为服务端路由入口可调用service业务层
`/app/controller/` 为服务端路由入口可调用service业务层
`/app/service/` 为service业务层可调用私有对象dao数据库
`/app/service/` 为service业务层可调用私有对象dao数据库层和time_series_detector学件接口
`/time_series_detector/` 学件目录供service业务层调用
`/app/model/` 模型文件存放目录供service业务层加载
`/app/utils/` 公共函数层全局可调用

Binary file not shown.

Before

Width:  |  Height:  |  Size: 70 KiB

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.9 KiB

View File

@ -68,15 +68,15 @@ mysql -umetis -pmetis@123 -h127.0.0.1 metis < /data/Metis/app/sql/time_series_de
4、将数据库配置信息更新到服务端配置文件`database.py`
```
vim /data/Metis/app/dao/db_common/database.py
vim /data/Metis/app/config/database.py
```
改写配置
```
DB = 'metis'
USER = 'metis'
PASSWD = 'metis@123'
HOST = '127.0.0.1'
PORT = 3306
db = 'metis'
user = 'metis'
passwd = 'metis@123'
host = '127.0.0.1'
port = 3306
```
## 1.3. <a id="chapter-1-3"></a>服务端环境安装
@ -111,7 +111,7 @@ export PYTHONPATH=/data/Metis:$PYTHONPATH
### 1.3.4. 启动服务端
启动服务端程序ip请替换为服务器真实ip地址
启动服务端程序
```
python /data/Metis/app/controller/manage.py runserver {ip}:{port}
@ -137,7 +137,7 @@ python /data/Metis/app/controller/manage.py runserver {ip}:{port}
运行npm run build
将uweb目录下的custom文件夹复制到uweb目录下生成的dist文件夹中
将uweb目录下的custom文件夹复制到uweb目录下生成的dist文件夹中
将nginx配置文件中的root定位到uweb目录下的dist文件夹
@ -175,8 +175,6 @@ nginx正常启动后打开浏览器并访问 `http://${ip}:80/`
npm run build 项目代码开发完成后,执行该命令打包项目代码。在项目根目录会生成一个 dist 目录然后复制custom目录放至dist目录下。发布时将 dist 目录中的全部文件作为静态文件,放至服务器指定的静态文件目录即可
安装完成后请参考API使用说明进行API调用
# 2. <a id="chapter-5"></a>docker安装部署
## 2.1. 安装docker
@ -197,7 +195,4 @@ docker ps
```
查看三个容器metis-db、metis-web、metis-svr启动状态如正常启动则安装成功。
![docker_ps](images/docker_ps.png)
如安装成功,可以通过浏览器直接访问: `http://${IP}`
注意Metis依赖80和8080端口腾讯云服务器默认开通了80但没有开通8080的外网访问权限需要手动在安全组中增加对8080端口的放通。
请参考API使用说明进行API调用
如安装成功,可以通过浏览器直接访问: `http://${IP}`

View File

@ -1 +0,0 @@
__all__ = ["fixtures", "test_feature"]

View File

@ -9,7 +9,8 @@ Unless required by applicable law or agreed to in writing, software distributed
"""
from tests.fixtures import DataTestCase
from time_series_detector.feature.statistical_features import *
from app.service.time_series_detector.feature.statistical_features import *
class FeatureTestCase(DataTestCase):

View File

@ -1 +0,0 @@
__all__ = ["algorithm", "feature", "common", "detect"]

View File

@ -1 +0,0 @@
__all__ = ["tsd_common", "tsd_errorcode"]

View File

@ -1,29 +0,0 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
Tencent is pleased to support the open source community by making Metis available.
Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
https://opensource.org/licenses/BSD-3-Clause
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
"""
TSD_OP_SUCCESS = 0
TSD_THROW_EXP = 1000
TSD_CHECK_PARAM_FAILED = 1002
TSD_FILE_FORMAT_ERR = 1003
TSD_CAL_FEATURE_ERR = 2001
TSD_READ_FEATURE_FAILED = 2002
TSD_TRAIN_ERR = 2003
TSD_LACK_SAMPLE = 2004
ERR_CODE = {
TSD_OP_SUCCESS: "操作成功",
TSD_THROW_EXP: "抛出异常",
TSD_CHECK_PARAM_FAILED: "参数检查失败",
TSD_FILE_FORMAT_ERR: "文件格式有误",
TSD_CAL_FEATURE_ERR: "特征计算出错",
TSD_READ_FEATURE_FAILED: "读取特征数据失败",
TSD_TRAIN_ERR: "训练出错",
TSD_LACK_SAMPLE: "缺少正样本或负样本"
}

View File

@ -1,124 +0,0 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
Tencent is pleased to support the open source community by making Metis available.
Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
https://opensource.org/licenses/BSD-3-Clause
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
"""
import os
from time_series_detector.algorithm import isolation_forest, ewma, polynomial_interpolation, statistic, xgboosting
from time_series_detector.common.tsd_errorcode import *
from time_series_detector.common.tsd_common import *
MODEL_PATH = os.path.join(os.path.dirname(__file__), './model/')
class Detect(object):
def __init__(self):
self.iforest_obj = isolation_forest.IForest()
self.ewma_obj = ewma.Ewma()
self.polynomial_obj = polynomial_interpolation.PolynomialInterpolation()
self.statistic_obj = statistic.Statistic()
self.supervised_obj = xgboosting.XGBoosting()
def __list_is_digit(self, data):
for index in data:
try:
float(index)
except ValueError:
return False
return True
def __check_param(self, data):
if ("dataC" not in data.keys()) or ("dataB" not in data.keys()) or ("dataA" not in data.keys()):
return TSD_CHECK_PARAM_FAILED, "missing parameter"
if not data['dataA']:
return TSD_CHECK_PARAM_FAILED, "dataA can not be empty"
if not data['dataB']:
return TSD_CHECK_PARAM_FAILED, "dataB can not be empty"
if not data['dataC']:
return TSD_CHECK_PARAM_FAILED, "dataC can not be empty"
if not self.__list_is_digit(data['dataA'].split(',')):
return TSD_CHECK_PARAM_FAILED, "dataA contains illegal numbers"
if not self.__list_is_digit(data['dataB'].split(',')):
return TSD_CHECK_PARAM_FAILED, "dataB contains illegal numbers"
if not self.__list_is_digit(data['dataC'].split(',')):
return TSD_CHECK_PARAM_FAILED, "dataC contains illegal numbers"
if "window" in data:
window = data["window"]
else:
window = DEFAULT_WINDOW
if len(data['dataC'].split(',')) != (2 * window + 1):
return TSD_CHECK_PARAM_FAILED, "dataC length does not match"
if len(data['dataB'].split(',')) != (2 * window + 1):
return TSD_CHECK_PARAM_FAILED, "dataB length does not match"
if len(data['dataA'].split(',')) != (window + 1):
return TSD_CHECK_PARAM_FAILED, "dataA length does not match"
return TSD_OP_SUCCESS, ""
def value_predict(self, data):
"""
Predict if the latest value is an outlier or not.
:param data: The attributes are:
'window', the length of window,
'taskId', the id of detect model,
'dataC', a piece of data to learn,
'dataB', a piece of data to learn,
'dataA', a piece of data to learn and the latest value to be detected.
:type data: Dictionary-like object
:return: The attributes are:
'p', the class probability,
'ret', the result of detect(1 denotes normal, 0 denotes abnormal).
"""
ret_code, ret_data = self.__check_param(data)
if ret_code != TSD_OP_SUCCESS:
return ret_code, ret_data
if "taskId" in data and data["taskId"]:
model_name = MODEL_PATH + data["taskId"] + "_model"
else:
model_name = MODEL_PATH + "xgb_default_model"
combined_data = data["dataC"] + "," + data["dataB"] + "," + data["dataA"]
time_series = map(int, combined_data.split(','))
if "window" in data:
window = data["window"]
else:
window = DEFAULT_WINDOW
statistic_result = self.statistic_obj.predict(time_series)
ewma_result = self.ewma_obj.predict(time_series)
polynomial_result = self.polynomial_obj.predict(time_series, window)
if statistic_result == 0 or ewma_result == 0 or polynomial_result == 0 :
xgb_result = self.supervised_obj.predict(time_series, window, model_name)
res_value = xgb_result[0]
prob = xgb_result[1]
else:
res_value = 1
prob = 1
ret_data = {"ret": res_value, "p": str(prob)}
return TSD_OP_SUCCESS, ret_data
def rate_predict(self, data):
"""
Predict if the latest value is an outlier or not.
:param data: The attributes are:
'dataC', a piece of data to learn,
'dataB', a piece of data to learn,
'dataA', a piece of data to learn and the latest value to be detected.
:type data: Dictionary-like object
:return: The attributes are:
'p', the class probability,
'ret', the result of detect(1 denotes normal, 0 denotes abnormal).
"""
combined_data = data["dataC"] + "," + data["dataB"] + "," + data["dataA"]
time_series = map(float, combined_data.split(','))
statistic_result = self.statistic_obj.predict(time_series)
if statistic_result == 0:
prob = 0
else:
prob = 1
ret_data = {"ret": statistic_result, "p": str(prob)}
return TSD_OP_SUCCESS, ret_data

View File

@ -1,211 +0,0 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
Tencent is pleased to support the open source community by making Metis available.
Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
https://opensource.org/licenses/BSD-3-Clause
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
"""
import numpy as np
import tsfresh.feature_extraction.feature_calculators as ts_feature_calculators
from time_series_detector.common.tsd_common import DEFAULT_WINDOW, split_time_series
from statistical_features import time_series_mean, time_series_variance, time_series_standard_deviation, time_series_median
def time_series_autocorrelation(x):
"""
Calculates the autocorrelation of the specified lag, according to the formula [1]
.. math::
\\frac{1}{(n-l)\sigma^{2}} \\sum_{t=1}^{n-l}(X_{t}-\\mu )(X_{t+l}-\\mu)
where :math:`n` is the length of the time series :math:`X_i`, :math:`\sigma^2` its variance and :math:`\mu` its
mean. `l` denotes the lag.
.. rubric:: References
[1] https://en.wikipedia.org/wiki/Autocorrelation#Estimation
:param x: the time series to calculate the feature of
:type x: pandas.Series
:param lag: the lag
:type lag: int
:return: the value of this feature
:return type: float
"""
lag = int((len(x) - 3) / 5)
if np.sqrt(np.var(x)) < 1e-10:
return 0
return ts_feature_calculators.autocorrelation(x, lag)
def time_series_coefficient_of_variation(x):
"""
Calculates the coefficient of variation, mean value / square root of variation
:param x: the time series to calculate the feature of
:type x: pandas.Series
:return: the value of this feature
:return type: float
"""
if np.sqrt(np.var(x)) < 1e-10:
return 0
return np.mean(x) / np.sqrt(np.var(x))
def time_series_binned_entropy(x):
"""
First bins the values of x into max_bins equidistant bins.
Then calculates the value of
.. math::
- \\sum_{k=0}^{min(max\\_bins, len(x))} p_k log(p_k) \\cdot \\mathbf{1}_{(p_k > 0)}
where :math:`p_k` is the percentage of samples in bin :math:`k`.
:param x: the time series to calculate the feature of
:type x: pandas.Series
:param max_bins: the maximal number of bins
:type max_bins: int
:return: the value of this feature
:return type: float
"""
max_bins = [2, 4, 6, 8, 10, 20]
result = []
for value in max_bins:
result.append(ts_feature_calculators.binned_entropy(x, value))
return result
def time_series_value_distribution(x):
"""
Given buckets, calculate the percentage of elements in the whole time series
in different buckets
:param x: normalized time series
:type x: pandas.Series
:return: the values of this feature
:return type: list
"""
thresholds = [0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0, 1.0]
return list(np.histogram(x, bins=thresholds)[0] / float(len(x)))
def time_series_daily_parts_value_distribution(x):
"""
Given buckets, calculate the percentage of elements in three subsequences
of the whole time series in different buckets
:param x: normalized time series
:type x: pandas.Series
:return: the values of this feature
:return type: list
"""
thresholds = [0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0, 1.0]
split_value_list = split_time_series(x, DEFAULT_WINDOW)
data_c = split_value_list[0] + split_value_list[1][1:]
data_b = split_value_list[2] + split_value_list[3][1:]
data_a = split_value_list[4]
count_c = list(np.histogram(data_c, bins=thresholds)[0])
count_b = list(np.histogram(data_b, bins=thresholds)[0])
count_a = list(np.histogram(data_a, bins=thresholds)[0])
return list(np.array(count_c) / float(len(data_c))) + list(np.array(count_b) / float(len(data_b))) + list(np.array(count_a) / float(len(data_a)))
def time_series_daily_parts_value_distribution_with_threshold(x):
"""
Split the whole time series into three parts: c, b, a.
Given a threshold = 0.01, return the percentage of elements of time series
which are less than threshold
:param x: normalized time series
:type x: pandas.Series
:return: 6 values of this feature
:return type: list
"""
threshold = 0.01
split_value_list = split_time_series(x, DEFAULT_WINDOW)
data_c = split_value_list[0] + split_value_list[1][1:]
data_b = split_value_list[2] + split_value_list[3][1:]
data_a = split_value_list[4]
# the number of elements in time series which is less than threshold:
nparray_data_c_threshold = np.array(data_c)
nparray_data_c_threshold[nparray_data_c_threshold < threshold] = -1
nparray_data_b_threshold = np.array(data_b)
nparray_data_b_threshold[nparray_data_b_threshold < threshold] = -1
nparray_data_a_threshold = np.array(data_a)
nparray_data_a_threshold[nparray_data_a_threshold < threshold] = -1
# the total number of elements in time series which is less than threshold:
nparray_threshold_count = (nparray_data_c_threshold == -1).sum() + (nparray_data_b_threshold == -1).sum() + (nparray_data_a_threshold == -1).sum()
if nparray_threshold_count == 0:
features = [0, 0, 0]
else:
features = [
(nparray_data_c_threshold == -1).sum() / float(nparray_threshold_count),
(nparray_data_b_threshold == -1).sum() / float(nparray_threshold_count),
(nparray_data_a_threshold == -1).sum() / float(nparray_threshold_count)
]
features.extend([
(nparray_data_c_threshold == -1).sum() / float(len(data_c)),
(nparray_data_b_threshold == -1).sum() / float(len(data_b)),
(nparray_data_a_threshold == -1).sum() / float(len(data_a))
])
return features
def time_series_window_parts_value_distribution_with_threshold(x):
"""
Split the whole time series into five parts.
Given a threshold = 0.01, return the percentage of elements of time series
which are less than threshold
:param x: normalized time series
:type x: pandas.Series
:return: 5 values of this feature
:return type: list
"""
threshold = 0.01
split_value_list = split_time_series(x, DEFAULT_WINDOW)
count_list = []
for value_list in split_value_list:
nparray_threshold = np.array(value_list)
nparray_threshold[nparray_threshold < threshold] = -1
count_list.append((nparray_threshold == -1).sum())
if sum(count_list) == 0:
features = [0, 0, 0, 0, 0]
else:
features = list(np.array(count_list) / float((DEFAULT_WINDOW + 1)))
return features
# add yourself classification features here...
def get_classification_features(x):
classification_features = [
time_series_mean(x),
time_series_variance(x),
time_series_standard_deviation(x),
time_series_median(x),
time_series_autocorrelation(x),
time_series_coefficient_of_variation(x)
]
classification_features.extend(time_series_value_distribution(x))
classification_features.extend(time_series_daily_parts_value_distribution(x))
classification_features.extend(time_series_daily_parts_value_distribution_with_threshold(x))
classification_features.extend(time_series_window_parts_value_distribution_with_threshold(x))
classification_features.extend(time_series_binned_entropy(x))
# add yourself classification features here...
return classification_features

View File

@ -1,79 +0,0 @@
/*
Tencent is pleased to support the open source community by making Metis available.
Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
https://opensource.org/licenses/BSD-3-Clause
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _DETECT_H
#define _DETECT_H
#include <inttypes.h>
#ifdef __cplusplus
extern "C"{
#endif
typedef struct {
int* data_a;
int* data_b;
int* data_c;
int len_a;
int len_b;
int len_c;
} ValueData;
typedef struct {
double* data_a;
double* data_b;
double* data_c;
int len_a;
int len_b;
int len_c;
} RateData;
enum TSD_ERR_CODE
{
TSD_SUCCESS = 0,
TSD_INVALID_HANDLER = -1,
TSD_CHECK_PARAM_FAILED = -2,
TSD_TIMESERIES_INIT_ERROR = -3
};
enum TSD_SAMPLE_RESULT
{
TSD_NEGATIVE = 0,
TSD_POSITIVE = 1
};
/*!
* \load xgb model from xgb file
* \param fname xgb file path and name
* \return handle when success, NULL when failure happens
*/
void * load_model(const char *fname);
/*!
* \Predict if the latest value is an outlier or not.
* \param mhandle the handle of the xgb model
* \param data the input data
* \param sample_result:(1 denotes noraml, 0 denotes abnormal).
* \return 0 when success, <0 when failure happens
*/
int value_predict(void * mhandle, ValueData* data, int* sample_result, float* prob);
/*!
* \Predict if the latest value is an outlier or not.
* \param mhandle the handle of the xgb model
* \param data the input data
* \param sample_result:(1 denotes noraml, 0 denotes abnormal).
* \return 0 when success, <0 when failure happens
*/
int rate_predict(RateData* data, int* sample_result, float* prob);
#ifdef __cplusplus
}
#endif
#endif