mirror of
https://github.com/Tencent/Metis.git
synced 2025-12-26 04:02:48 +00:00
Compare commits
22 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bf50841faf | ||
|
|
0a49a9b1ee | ||
|
|
8364184c46 | ||
|
|
8cbdc6dc93 | ||
|
|
5490207e81 | ||
|
|
1a8eab9ec5 | ||
|
|
e21a00bcd4 | ||
|
|
6e8344de95 | ||
|
|
6945e32cc5 | ||
|
|
024957e32d | ||
|
|
3c46aa7282 | ||
|
|
a9348864e0 | ||
|
|
eb9752200e | ||
|
|
7c42a3ccae | ||
|
|
13802fd1f9 | ||
|
|
e3167d25c9 | ||
|
|
86569e65bc | ||
|
|
48618f59cb | ||
|
|
c739f92ca6 | ||
|
|
86076d843f | ||
|
|
21b2b1614d | ||
|
|
5faf04cf06 |
|
|
@ -3,7 +3,7 @@
|
|||

|
||||
|
||||
[](https://github.com/tencent/Metis/master/LICENSE.TXT)
|
||||
[](https://github.com/tencent/Metis/releases)
|
||||
[](https://github.com/tencent/Metis/releases)
|
||||
[](https://github.com/tencent/Metis/pulls)
|
||||
|
||||
The name **Metis** is taken from the Greek goddess of wisdom, Metis, which is a collection of application practices in the AIOps field. It mainly solves the problem of intelligent operation and maintenance in terms of quality, efficiency and cost. The current version of the open source time series anomaly detection learnware is to solve the anomaly detection problem of time series data from the perspective of machine learning.
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||

|
||||
|
||||
[](https://github.com/tencent/Metis/master/LICENSE.TXT)
|
||||
[](https://github.com/tencent/Metis/releases)
|
||||
[](https://github.com/tencent/Metis/releases)
|
||||
[](https://github.com/tencent/Metis/pulls)
|
||||
|
||||
**Metis** 这个名字取自希腊神话中的智慧女神墨提斯(Metis),它是一系列AIOps领域的应用实践集合。主要解决在质量、效率、成本方面的智能运维问题。当前版本开源的时间序列异常检测学件,是从机器学习的角度来解决时序数据的异常检测问题。
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
__all__ = ["config", "controller", "dao", "model", "service", "utils"]
|
||||
__all__ = ["common", "dao", "service"]
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
__all__ = ["common", "errorcode"]
|
||||
|
|
@ -8,6 +8,10 @@ https://opensource.org/licenses/BSD-3-Clause
|
|||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
|
||||
import traceback
|
||||
from functools import wraps
|
||||
from errorcode import *
|
||||
|
||||
DEFAULT_WINDOW = 180
|
||||
INPUT_LEN_ENG_MAX = 32
|
||||
INPUT_LEN_CH_MAX = 64
|
||||
|
|
@ -17,3 +21,20 @@ VALUE_LEN_MAX = 50000
|
|||
UPLOAD_FILE = '/tmp/tmpfile_%s.csv'
|
||||
MARK_POSITIVE = 1
|
||||
MARK_NEGATIVE = 2
|
||||
|
||||
|
||||
def build_ret_data(ret_code, data=""):
|
||||
return {"code": ret_code, "msg": ERR_CODE[ret_code], "data": data}
|
||||
|
||||
|
||||
def exce_service(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
ret_code, ret_data = func(*args, **kwargs)
|
||||
return_dict = build_ret_data(ret_code, ret_data)
|
||||
except Exception as ex:
|
||||
traceback.print_exc()
|
||||
return_dict = build_ret_data(THROW_EXP, str(ex))
|
||||
return return_dict
|
||||
return wrapper
|
||||
|
|
@ -32,4 +32,4 @@ ERR_CODE = {
|
|||
READ_FEATURE_FAILED: "读取特征数据失败",
|
||||
TRAIN_ERR: "训练出错",
|
||||
LACK_SAMPLE: "缺少正样本或负样本"
|
||||
}
|
||||
}
|
||||
|
|
@ -1 +0,0 @@
|
|||
__all__ = ["database", "common", "errorcode"]
|
||||
|
|
@ -1,16 +1,16 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
import json
|
||||
from functools import wraps
|
||||
from django.shortcuts import render
|
||||
from django.http import FileResponse
|
||||
from common.render import render_json
|
||||
from functools import wraps
|
||||
from render import render_json
|
||||
from app.service.time_series_detector.anomaly_service import *
|
||||
from app.service.time_series_detector.sample_service import *
|
||||
from app.service.time_series_detector.task_service import *
|
||||
from app.service.time_series_detector.detect_service import *
|
||||
from app.config.errorcode import *
|
||||
from app.utils.utils import *
|
||||
from app.common.errorcode import *
|
||||
from app.common.common import *
|
||||
|
||||
|
||||
def check_post(func):
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
__all__ = ["time_series_detector"]
|
||||
__all__ = ["db_common", "time_series_detector"]
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
__all__ = ["database"]
|
||||
|
|
@ -1 +1 @@
|
|||
__all__ = ["anomaly_op", "sample_op", "train_op"]
|
||||
__all__ = ["anomaly_op", "sample_op", "train_op"]
|
||||
|
|
|
|||
|
|
@ -8,13 +8,11 @@ https://opensource.org/licenses/BSD-3-Clause
|
|||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
|
||||
import time
|
||||
import datetime
|
||||
import MySQLdb
|
||||
from app.config import database
|
||||
from app.dao.db_common import database
|
||||
from app.dao.time_series_detector.sample_op import *
|
||||
from app.config.common import *
|
||||
from app.config.errorcode import *
|
||||
from app.common.common import *
|
||||
from app.common.errorcode import *
|
||||
|
||||
|
||||
class AbnormalOperation(object):
|
||||
|
|
|
|||
|
|
@ -13,9 +13,9 @@ import uuid
|
|||
import csv
|
||||
import codecs
|
||||
import MySQLdb
|
||||
from app.config import database
|
||||
from app.config.common import *
|
||||
from app.config.errorcode import *
|
||||
from app.dao.db_common import database
|
||||
from app.common.common import *
|
||||
from app.common.errorcode import *
|
||||
|
||||
|
||||
class SampleOperation(object):
|
||||
|
|
|
|||
|
|
@ -9,9 +9,9 @@ Unless required by applicable law or agreed to in writing, software distributed
|
|||
"""
|
||||
|
||||
import MySQLdb
|
||||
from app.config import database
|
||||
from app.config.common import *
|
||||
from app.config.errorcode import *
|
||||
from app.dao.db_common import database
|
||||
from app.common.common import *
|
||||
from app.common.errorcode import *
|
||||
|
||||
|
||||
class TrainOperation(object):
|
||||
|
|
|
|||
|
|
@ -1 +0,0 @@
|
|||
__all__ = ["time_series_detector"]
|
||||
Binary file not shown.
|
|
@ -1 +1 @@
|
|||
__all__ = ["time_series_detector"]
|
||||
__all__ = ["time_series_detector"]
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
__all__ = ["algorithm", "feature", "anomaly_service", "sample_service", "task_service", "detect_service"]
|
||||
__all__ = ["anomaly_service", "sample_service", "task_service", "detect_service"]
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ Unless required by applicable law or agreed to in writing, software distributed
|
|||
|
||||
import json
|
||||
from app.dao.time_series_detector.anomaly_op import *
|
||||
from app.utils.utils import *
|
||||
|
||||
|
||||
class AnomalyService(object):
|
||||
|
|
|
|||
|
|
@ -14,11 +14,12 @@ import threading
|
|||
from app.dao.time_series_detector import anomaly_op
|
||||
from app.dao.time_series_detector import sample_op
|
||||
from app.dao.time_series_detector import train_op
|
||||
from app.utils.utils import *
|
||||
from app.service.time_series_detector.algorithm import isolation_forest, ewma, polynomial_interpolation, statistic, xgboosting
|
||||
from app.config.errorcode import *
|
||||
from app.config.common import *
|
||||
MODEL_PATH = os.path.join(os.path.dirname(__file__), '../../model/time_series_detector/')
|
||||
from time_series_detector.algorithm import xgboosting
|
||||
from time_series_detector import detect
|
||||
from app.common.errorcode import *
|
||||
from app.common.common import *
|
||||
from time_series_detector.common.tsd_errorcode import *
|
||||
MODEL_PATH = os.path.join(os.path.dirname(__file__), './model/')
|
||||
|
||||
|
||||
class DetectService(object):
|
||||
|
|
@ -26,11 +27,7 @@ class DetectService(object):
|
|||
def __init__(self):
|
||||
self.sample_op_obj = sample_op.SampleOperation()
|
||||
self.anomaly_op_obj = anomaly_op.AbnormalOperation()
|
||||
self.iforest_obj = isolation_forest.IForest()
|
||||
self.ewma_obj = ewma.Ewma()
|
||||
self.polynomial_obj = polynomial_interpolation.PolynomialInterpolation()
|
||||
self.statistic_obj = statistic.Statistic()
|
||||
self.supervised_obj = xgboosting.XGBoosting()
|
||||
self.detect_obj = detect.Detect()
|
||||
|
||||
def __generate_model(self, data, task_id):
|
||||
"""
|
||||
|
|
@ -123,64 +120,16 @@ class DetectService(object):
|
|||
return True
|
||||
|
||||
def __check_param(self, data):
|
||||
if ("viewName" not in data.keys()) or ("attrId" not in data.keys()) or ("attrName" not in data.keys()) or ("time" not in data.keys()) or ("dataC" not in data.keys()) or ("dataB" not in data.keys()) or ("dataA" not in data.keys()):
|
||||
if ("viewName" not in data.keys()) or ("viewId" not in data.keys()) or ("attrId" not in data.keys()) or ("attrName" not in data.keys()) or ("time" not in data.keys()) or ("dataC" not in data.keys()) or ("dataB" not in data.keys()) or ("dataA" not in data.keys()):
|
||||
return CHECK_PARAM_FAILED, "missing parameter"
|
||||
if not data['dataA']:
|
||||
return CHECK_PARAM_FAILED, "dataA can not be empty"
|
||||
if not data['dataB']:
|
||||
return CHECK_PARAM_FAILED, "dataB can not be empty"
|
||||
if not data['dataC']:
|
||||
return CHECK_PARAM_FAILED, "dataC can not be empty"
|
||||
if not self.__list_is_digit(data['dataA'].split(',')):
|
||||
return CHECK_PARAM_FAILED, "dataA contains illegal numbers"
|
||||
if not self.__list_is_digit(data['dataB'].split(',')):
|
||||
return CHECK_PARAM_FAILED, "dataB contains illegal numbers"
|
||||
if not self.__list_is_digit(data['dataC'].split(',')):
|
||||
return CHECK_PARAM_FAILED, "dataC contains illegal numbers"
|
||||
if "window" in data:
|
||||
window = data["window"]
|
||||
else:
|
||||
window = DEFAULT_WINDOW
|
||||
if len(data['dataC'].split(',')) != (2 * window + 1):
|
||||
return CHECK_PARAM_FAILED, "dataC length does not match"
|
||||
if len(data['dataB'].split(',')) != (2 * window + 1):
|
||||
return CHECK_PARAM_FAILED, "dataB length does not match"
|
||||
if len(data['dataA'].split(',')) != (window + 1):
|
||||
return CHECK_PARAM_FAILED, "dataA length does not match"
|
||||
return OP_SUCCESS, ""
|
||||
|
||||
def value_predict(self, data):
|
||||
"""
|
||||
Predict the data
|
||||
|
||||
:param data: the time series to detect of
|
||||
"""
|
||||
ret_code, ret_data = self.__check_param(data)
|
||||
if ret_code != OP_SUCCESS:
|
||||
return build_ret_data(ret_code, ret_data)
|
||||
if "taskId" in data and data["taskId"]:
|
||||
model_name = MODEL_PATH + data["taskId"] + "_model"
|
||||
else:
|
||||
model_name = MODEL_PATH + "xgb_default_model"
|
||||
combined_data = data["dataC"] + "," + data["dataB"] + "," + data["dataA"]
|
||||
time_series = map(int, combined_data.split(','))
|
||||
if "window" in data:
|
||||
window = data["window"]
|
||||
else:
|
||||
window = DEFAULT_WINDOW
|
||||
statistic_result = self.statistic_obj.predict(time_series)
|
||||
ewma_result = self.ewma_obj.predict(time_series)
|
||||
polynomial_result = self.polynomial_obj.predict(time_series, window)
|
||||
iforest_result = self.iforest_obj.predict(time_series, window)
|
||||
if statistic_result == 0 or ewma_result == 0 or polynomial_result == 0 or iforest_result == 0:
|
||||
xgb_result = self.supervised_obj.predict(time_series, window, model_name)
|
||||
res_value = xgb_result[0]
|
||||
prob = xgb_result[1]
|
||||
else:
|
||||
res_value = 1
|
||||
prob = 1
|
||||
ret_data = {"ret": res_value, "p": str(prob)}
|
||||
if ret_data["ret"] == 0:
|
||||
ret_code, ret_data = self.detect_obj.value_predict(data)
|
||||
if ret_code == TSD_OP_SUCCESS and ret_data["ret"] == 0:
|
||||
anomaly_params = {
|
||||
"view_id": data["viewId"],
|
||||
"view_name": data["viewName"],
|
||||
|
|
@ -192,18 +141,14 @@ class DetectService(object):
|
|||
"data_a": data["dataA"]
|
||||
}
|
||||
self.anomaly_op_obj.insert_anomaly(anomaly_params)
|
||||
return build_ret_data(OP_SUCCESS, ret_data)
|
||||
return build_ret_data(ret_code, ret_data)
|
||||
|
||||
def rate_predict(self, data):
|
||||
combined_data = data["dataC"] + "," + data["dataB"] + "," + data["dataA"]
|
||||
time_series = map(float, combined_data.split(','))
|
||||
statistic_result = self.statistic_obj.predict(time_series)
|
||||
if statistic_result == 0:
|
||||
prob = 0
|
||||
else:
|
||||
prob = 1
|
||||
ret_data = {"ret": statistic_result, "p": str(prob)}
|
||||
if ret_data["ret"] == 0:
|
||||
ret_code, ret_data = self.__check_param(data)
|
||||
if ret_code != OP_SUCCESS:
|
||||
return build_ret_data(ret_code, ret_data)
|
||||
ret_data, ret_data = self.detect_obj.rate_predict(data)
|
||||
if ret_code == TSD_OP_SUCCESS and ret_data["ret"] == 0:
|
||||
anomaly_params = {
|
||||
"view_id": data["viewId"],
|
||||
"view_name": data["viewName"],
|
||||
|
|
|
|||
|
|
@ -1,88 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
Tencent is pleased to support the open source community by making Metis available.
|
||||
Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
|
||||
https://opensource.org/licenses/BSD-3-Clause
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import tsfresh.feature_extraction.feature_calculators as ts_feature_calculators
|
||||
|
||||
|
||||
def time_series_autocorrelation(x):
|
||||
"""
|
||||
Calculates the autocorrelation of the specified lag, according to the formula [1]
|
||||
|
||||
.. math::
|
||||
|
||||
\\frac{1}{(n-l)\sigma^{2}} \\sum_{t=1}^{n-l}(X_{t}-\\mu )(X_{t+l}-\\mu)
|
||||
|
||||
where :math:`n` is the length of the time series :math:`X_i`, :math:`\sigma^2` its variance and :math:`\mu` its
|
||||
mean. `l` denotes the lag.
|
||||
|
||||
.. rubric:: References
|
||||
|
||||
[1] https://en.wikipedia.org/wiki/Autocorrelation#Estimation
|
||||
|
||||
:param x: the time series to calculate the feature of
|
||||
:type x: pandas.Series
|
||||
:param lag: the lag
|
||||
:type lag: int
|
||||
:return: the value of this feature
|
||||
:return type: float
|
||||
"""
|
||||
lag = int((len(x) - 3) / 5)
|
||||
return ts_feature_calculators.autocorrelation(x, lag)
|
||||
|
||||
|
||||
def time_series_coefficient_of_variation(x):
|
||||
"""
|
||||
Calculates the coefficient of variation, mean value / square root of variation
|
||||
|
||||
:param x: the time series to calculate the feature of
|
||||
:type x: pandas.Series
|
||||
:return: the value of this feature
|
||||
:return type: float
|
||||
"""
|
||||
return np.mean(x) / np.sqrt(np.var(x))
|
||||
|
||||
|
||||
def time_series_binned_entropy(x):
|
||||
"""
|
||||
First bins the values of x into max_bins equidistant bins.
|
||||
Then calculates the value of
|
||||
|
||||
.. math::
|
||||
|
||||
- \\sum_{k=0}^{min(max\\_bins, len(x))} p_k log(p_k) \\cdot \\mathbf{1}_{(p_k > 0)}
|
||||
|
||||
where :math:`p_k` is the percentage of samples in bin :math:`k`.
|
||||
|
||||
:param x: the time series to calculate the feature of
|
||||
:type x: pandas.Series
|
||||
:param max_bins: the maximal number of bins
|
||||
:type max_bins: int
|
||||
:return: the value of this feature
|
||||
:return type: float
|
||||
"""
|
||||
max_bins = [2, 4, 6, 8, 10, 20]
|
||||
result = []
|
||||
for value in max_bins:
|
||||
result.append(ts_feature_calculators.binned_entropy(x, value))
|
||||
return result
|
||||
|
||||
# add yourself classification features here...
|
||||
|
||||
|
||||
def get_classification_features(x):
|
||||
classification_features = [
|
||||
time_series_autocorrelation(x),
|
||||
time_series_coefficient_of_variation(x)
|
||||
]
|
||||
classification_features.extend(time_series_binned_entropy(x))
|
||||
# append yourself classification features here...
|
||||
|
||||
return classification_features
|
||||
|
|
@ -12,9 +12,8 @@ import json
|
|||
import traceback
|
||||
import csv
|
||||
from app.dao.time_series_detector.sample_op import *
|
||||
from app.config.errorcode import *
|
||||
from app.utils.utils import *
|
||||
from app.config.common import *
|
||||
from app.common.errorcode import *
|
||||
from app.common.common import *
|
||||
|
||||
|
||||
class SampleService(object):
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@ Unless required by applicable law or agreed to in writing, software distributed
|
|||
|
||||
import json
|
||||
from app.dao.time_series_detector.train_op import *
|
||||
from app.config.errorcode import *
|
||||
from app.utils.utils import *
|
||||
from app.common.errorcode import *
|
||||
from app.common.common import *
|
||||
|
||||
|
||||
class TrainService(object):
|
||||
|
|
|
|||
|
|
@ -4,16 +4,16 @@ SET FOREIGN_KEY_CHECKS=0;
|
|||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `anomaly`;
|
||||
CREATE TABLE `anomaly` (
|
||||
`id` bigint(20) NOT NULL AUTO_INCREMENT,
|
||||
`view_id` varchar(31) DEFAULT NULL,
|
||||
`view_name` varchar(63) DEFAULT NULL,
|
||||
`attr_id` varchar(31) DEFAULT NULL,
|
||||
`attr_name` varchar(63) DEFAULT NULL,
|
||||
`time` datetime DEFAULT NULL,
|
||||
`data_c` text NOT NULL,
|
||||
`data_b` text NOT NULL,
|
||||
`data_a` text NOT NULL,
|
||||
`mark_flag` int(1) NOT NULL DEFAULT '0',
|
||||
`id` int(10) NOT NULL AUTO_INCREMENT,
|
||||
`view_id` varchar(31) NOT NULL DEFAULT '' comment '指标集id',
|
||||
`view_name` varchar(63) NOT NULL DEFAULT '' comment '指标集名',
|
||||
`attr_id` varchar(31) NOT NULL DEFAULT '' comment '指标id',
|
||||
`attr_name` varchar(63) NOT NULL DEFAULT '' comment '指标名',
|
||||
`time` datetime DEFAULT NULL comment '数据时间',
|
||||
`data_c` text,
|
||||
`data_b` text,
|
||||
`data_a` text,
|
||||
`mark_flag` tinyint(1) NOT NULL DEFAULT 0 comment '0:没有打标、1:打标为正样本、2:打标为负样本',
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
|
||||
|
||||
|
|
|
|||
|
|
@ -4,21 +4,21 @@ SET FOREIGN_KEY_CHECKS=0;
|
|||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `sample_dataset`;
|
||||
CREATE TABLE `sample_dataset` (
|
||||
`id` bigint(10) NOT NULL AUTO_INCREMENT,
|
||||
`update_time` timestamp NULL DEFAULT NULL,
|
||||
`view_id` varchar(31) DEFAULT NULL,
|
||||
`view_name` varchar(63) DEFAULT NULL,
|
||||
`attr_name` varchar(63) DEFAULT NULL,
|
||||
`attr_id` varchar(31) DEFAULT NULL,
|
||||
`source` varchar(31) DEFAULT NULL,
|
||||
`train_or_test` varchar(31) DEFAULT NULL,
|
||||
`positive_or_negative` varchar(31) DEFAULT NULL,
|
||||
`window` int(2) DEFAULT NULL,
|
||||
`data_time` int(11) DEFAULT NULL,
|
||||
`id` int(10) NOT NULL AUTO_INCREMENT,
|
||||
`update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '样本更新时间',
|
||||
`view_id` varchar(31) NOT NULL DEFAULT '' comment '指标集id',
|
||||
`view_name` varchar(63) NOT NULL DEFAULT '' comment '指标名',
|
||||
`attr_name` varchar(63) NOT NULL DEFAULT '' comment '指标名',
|
||||
`attr_id` varchar(31) NOT NULL DEFAULT '' comment '指标id',
|
||||
`source` varchar(31) NOT NULL DEFAULT '' comment '样本来源',
|
||||
`train_or_test` varchar(10) NOT NULL DEFAULT '' comment 'test:测试样本、train:训练样本',
|
||||
`positive_or_negative` varchar(20) NOT NULL DEFAULT '' comment 'positive:正样本、negative负样本',
|
||||
`window` int(10) NOT NULL DEFAULT 0 comment '窗口值,目前支持180',
|
||||
`data_time` int(10) DEFAULT NULL comment '样本时间',
|
||||
`data_c` text,
|
||||
`data_b` text,
|
||||
`data_a` text,
|
||||
`anomaly_id` bigint(10) DEFAULT NULL,
|
||||
`anomaly_id` int(10) DEFAULT NULL comment '标识从anomaly里插入的样本',
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,25 +1,24 @@
|
|||
SET FOREIGN_KEY_CHECKS=0;
|
||||
-- ----------------------------
|
||||
-- Table structure for `train_task`
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `train_task`;
|
||||
CREATE TABLE `train_task` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`task_id` char(255) DEFAULT NULL,
|
||||
`sample_num` int(11) DEFAULT NULL,
|
||||
`postive_sample_num` int(11) DEFAULT NULL,
|
||||
`negative_sample_num` int(11) DEFAULT NULL,
|
||||
`window` int(2) DEFAULT NULL,
|
||||
`model_name` varchar(20) DEFAULT NULL,
|
||||
`source` varchar(255) DEFAULT NULL,
|
||||
`start_time` timestamp NULL DEFAULT NULL,
|
||||
`end_time` timestamp NULL DEFAULT NULL,
|
||||
`status` varchar(11) DEFAULT NULL,
|
||||
PRIMARY KEY (`id`),
|
||||
KEY `id` (`id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
|
||||
|
||||
-- ----------------------------
|
||||
-- Records of train_task
|
||||
-- ----------------------------
|
||||
INSERT INTO `train_task` VALUES ('1', '1535790960079', '90675', '45228', '45447', '180', 'xgb_default_model', 'Metis', '2018-09-01 16:36:00', '2018-09-01 16:45:40', 'complete');
|
||||
SET FOREIGN_KEY_CHECKS=0;
|
||||
-- ----------------------------
|
||||
-- Table structure for `train_task`
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `train_task`;
|
||||
CREATE TABLE `train_task` (
|
||||
`id` int(10) NOT NULL AUTO_INCREMENT,
|
||||
`task_id` varchar(20) NOT NULL DEFAULT '' comment '训练任务id',
|
||||
`sample_num` int(10) NOT NULL DEFAULT 0 comment '训练总样本数',
|
||||
`postive_sample_num` int(10) NOT NULL DEFAULT 0 comment '训练正样本数',
|
||||
`negative_sample_num` int(10) NOT NULL DEFAULT 0 comment '训练负样本数',
|
||||
`window` int(10) NOT NULL DEFAULT 0 comment '窗口值,目前支持180',
|
||||
`model_name` varchar(20) NOT NULL DEFAULT '' comment '模型名',
|
||||
`source` varchar(255) NOT NULL DEFAULT '' comment '样本来源',
|
||||
`start_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '训练任务开始时间',
|
||||
`end_time` timestamp NULL DEFAULT NULL comment '训练任务结束时间',
|
||||
`status` varchar(11) NOT NULL DEFAULT '' comment 'complete:任务完成、running:任务正在运行、failed:任务失败',
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
|
||||
|
||||
-- ----------------------------
|
||||
-- Records of train_task
|
||||
-- ----------------------------
|
||||
INSERT INTO `train_task` VALUES ('1', '1535790960079', '90675', '45228', '45447', '180', 'xgb_default_model', 'Metis', '2018-09-01 16:36:00', '2018-09-01 16:45:40', 'complete');
|
||||
|
|
|
|||
|
|
@ -1 +0,0 @@
|
|||
__all__ = ["utils"]
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
# ChangeLog
|
||||
|
||||
## [2018-11-07, Version v0.2.0](https://github.com/Tencent/Metis/releases/tag/v0.2.0)
|
||||
|
||||
|
||||
### Bug fixes
|
||||
|
||||
- [[```48618f59```](https://github.com/Tencent/Metis/commit/48618f59cb70249cba2d01d5413cbb4eea418721)] __-__ __docker__: start.sh dos2unix (lxd1190)
|
||||
- [[```86076d84```](https://github.com/Tencent/Metis/commit/86076d843fab582c46728a7916a81aef7f1b78c3)] __-__ __docker__: update docker image (lxd1190)
|
||||
|
||||
|
||||
### Code Refactoring
|
||||
|
||||
- [[```21b2b161```](https://github.com/Tencent/Metis/commit/21b2b1614d91eec1fc2fb07c6628f6a3868523e7)] __-__ __app__: refactor dictionary: add app module (lxd1190)
|
||||
- [[```5faf04cf```](https://github.com/Tencent/Metis/commit/5faf04cf06643f7c9f3833daba7c81a31c028eef)] __-__ __app__: refactor dictionary (lxd1190)
|
||||
|
||||
|
||||
### Other commits
|
||||
|
||||
- [[```e3167d25```](https://github.com/Tencent/Metis/commit/e3167d25c92cb9c852cdd5100de61c30f62ce9d5)] __-__ __docs(architecture docs)__ : update docs content (lxd1190)
|
||||
- [[```86569e65```](https://github.com/Tencent/Metis/commit/86569e65bc4f5717fdd35c7511347f6e129f109d)] __-__ __docs(code_framework)__ : update arch picture and code description (lxd1190)
|
||||
- [[```c739f92c```](https://github.com/Tencent/Metis/commit/c739f92ca6def3e37c75641c0bf22e41eb4e3c11)] __-__ __docs(install)__ : update db config path (lxd1190)
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -12,5 +12,5 @@ docker rm -f metis-web
|
|||
|
||||
docker run --net=host --name=metis-db -d -p 3306:3306 -v /data/metis/mysql/:/var/lib/mysql:Z -e MYSQL_ROOT_PASSWORD=metis@123 zhiyunmetis/metis-db
|
||||
sleep 6
|
||||
docker run --net=host --name=metis-svr -d -p 8080:8080 -v /data/metis/module/:/metis/app/model/time_series_detector:Z zhiyunmetis/metis-svr /bin/sh /metis/init.sh
|
||||
docker run --net=host --name=metis-web -d -p 80:80 zhiyunmetis/metis-web /bin/sh /metis/init.sh ${ip}
|
||||
docker run --net=host --name=metis-svr -d -p 8080:8080 -v /data/metis/model/:/metis/time_series_detector/model:Z zhiyunmetis/metis-svr /bin/sh /metis/init.sh
|
||||
docker run --net=host --name=metis-web -d -p 80:80 zhiyunmetis/metis-web /bin/sh /metis/init.sh ${ip}
|
||||
|
|
|
|||
|
|
@ -1,20 +1,22 @@
|
|||
# API接口文档
|
||||
## 时间序列异常检测学件接口
|
||||
## 时间序列异常检测接口
|
||||
|
||||
用户可使用API接口对时间序列进行异常检测,检测后的结果通过WEB管理端查看和管理。 服务端提供两个异常检测的API接口供不同场景调用:
|
||||
用户可根据场景选择使用API接口对时间序列进行异常检测:
|
||||
|
||||
1、量值检测:适用于大多数类型数据的检测,使用无监督和有监督联合检测,会加载检测模型
|
||||
1、量值检测:适用于大多数KPI指标数据的检测,使用无监督和有监督联合检测,会加载检测模型
|
||||
|
||||
2、率值检测:适用于正态分布类型数据的检测,使用无监督算法进行检测,如成功率等生死指标数据的检测
|
||||
|
||||
- API请求调用请使用搭建的后端服务地址
|
||||
- HTTP接口调用请使用搭建的后端服务地址;Python接口可直接调用
|
||||
- 当前检测时间窗口选取为3小时,每分钟1个数据点,即窗口值为180
|
||||
- 同比数据日期和时间段的选择可根据实际情况调整,文档中两个同比数据分别取昨日和一周前的同比
|
||||
|
||||
针对当前一个值的检测,需要依赖过去三段数据,数据选取规则参考示例图:
|
||||

|
||||
|
||||
### 1、量值检测
|
||||
### 一、HTTP接口
|
||||
|
||||
#### 1、量值检测
|
||||
|
||||
* API: POST /{ip}:{port}/PredictValue
|
||||
* 功能说明:根据参考数据检测最近一个数据点是否异常
|
||||
|
|
@ -73,7 +75,7 @@
|
|||
| ret | int | 检测结果是否异常。0:异常;1:正常 |
|
||||
| p | string | 概率值,值越小,判定为异常的置信度越高,目前p<0.15,判决为异常 |
|
||||
|
||||
### 2、率值检测
|
||||
#### 2、率值检测
|
||||
|
||||
* API: POST /{ip}:{port}/PredictRate
|
||||
* 功能说明:根据参考数据检测最近一个数据点是否异常
|
||||
|
|
@ -129,4 +131,404 @@
|
|||
| code | int | 返回码。0:成功;非0:失败 |
|
||||
| msg | string | 返回消息 |
|
||||
| ret | int | 检测结果是否异常。0:异常;1:正常 |
|
||||
| p | string | 概率值,值越小,判定为异常的置信度越高 |
|
||||
| p | string | 概率值,值越小,判定为异常的置信度越高 |
|
||||
|
||||
### 二、Python API
|
||||
|
||||
Metis工程目录下time_series_detector目录为时间序列异常检测学件,可以在python代码中直接调用
|
||||
|
||||
#### 1、量值检测
|
||||
* 功能说明:根据参考数据检测最近一个数据点是否异常
|
||||
|
||||
* 调用方法:
|
||||
|
||||
```
|
||||
# Python
|
||||
from time_series_detector import detect
|
||||
|
||||
detect_obj = detect.Detect()
|
||||
detect_obj.value_predict(data)
|
||||
```
|
||||
|
||||
|
||||
* 传入参数:python字典
|
||||
|
||||
```
|
||||
{
|
||||
"window":180,
|
||||
"dataC":"9,10,152,...,255,...,16",
|
||||
"dataB":"9,10,152,...,255,...,18",
|
||||
"dataA":"9,10,152,...,458"
|
||||
}
|
||||
```
|
||||
|
||||
* 传入参数说明:
|
||||
|
||||
| 名称 | 类型 |必填| 默认值 | 说明 |
|
||||
| --- | --- | --- |---- | --- |
|
||||
| taskId| string| 否| 无|使用的检测模型,如不传,则采用系统默认模型|
|
||||
| window| int| 否| 无|窗口值,目前支持180|
|
||||
| dataC| string| 是| 无|待检测的1个点对应一周前同时刻的点 + 前后各180个数据,361个数据点按时间顺序拼接,英文逗号分隔|
|
||||
| dataB| string| 是| 无|待检测的1个点对应昨日同时刻的点 + 前后各180个数据,361个数据点按时间顺序拼接,英文逗号分隔|
|
||||
| dataA| string| 是| 无|待检测的1个点+前180个数据,共181个数据点,181个数据点按时间顺序拼接,英文逗号分隔|
|
||||
|
||||
|
||||
|
||||
* 返回参数:
|
||||
```
|
||||
code, {
|
||||
"ret":0,
|
||||
"p":"0.05",
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
* 返回参数说明:
|
||||
|
||||
| 名称 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| code | int | 返回码。0:成功;非0:失败 |
|
||||
| ret | int | 检测结果是否异常。0:异常;1:正常 |
|
||||
| p | string | 概率值,值越小,判定为异常的置信度越高,目前p<0.15,判决为异常 |
|
||||
|
||||
* 调用案例:
|
||||
|
||||

|
||||
|
||||
#### 2、率值检测
|
||||
* 功能说明:根据参考数据检测最近一个数据点是否异常
|
||||
|
||||
* 调用方法:
|
||||
|
||||
```
|
||||
# Python
|
||||
from time_series_detector import detect
|
||||
|
||||
detect_obj = detect.Detect()
|
||||
detect_obj.rate_predict(data)
|
||||
```
|
||||
|
||||
* 传入参数:python字典
|
||||
|
||||
```
|
||||
{
|
||||
"dataC":"9,10,152,...,255,...,16",
|
||||
"dataB":"9,10,152,...,255,...,18",
|
||||
"dataA":"9,10,152,...,458"
|
||||
}
|
||||
```
|
||||
|
||||
* 传入参数说明:
|
||||
|
||||
| 名称 | 类型 |必填| 默认值 | 说明 |
|
||||
| --- | --- | --- |--- | --- |
|
||||
| dataC| string| 是| 无|待检测的1个点对应一周前同时刻的点 + 前后各180个数据,361个数据点按时间顺序拼接,英文逗号分隔|
|
||||
| dataB| string| 是| 无|待检测的1个点对应昨日同时刻的点 + 前后各180个数据,361个数据点按时间顺序拼接,英文逗号分隔|
|
||||
| dataA| string| 是| 无|待检测的1个点+前180个数据,共181个数据点,181个数据点按时间顺序拼接,英文逗号分隔|
|
||||
|
||||
|
||||
|
||||
* 返回参数:
|
||||
```
|
||||
code, {
|
||||
"ret":0,
|
||||
"p":"0",
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
* 返回参数说明:
|
||||
|
||||
| 名称 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| code | int | 返回码。0:成功;非0:失败 |
|
||||
| ret | int | 检测结果是否异常。0:异常;1:正常 |
|
||||
| p | string | 概率值,值越小,判定为异常的置信度越高,目前p<0.15,判决为异常 |
|
||||
|
||||
* 调用案例:
|
||||
|
||||

|
||||
|
||||
### 三、LIB库
|
||||
Metis工程目录下time_series_detector/lib为学件动态库目录,库文件可以在代码中加载调用
|
||||
|
||||
libdetect.so目前支持在CentOs7.2+系统环境下使用
|
||||
|
||||
|
||||
|
||||
#### Python代码中调用:
|
||||
|
||||
##### 1、量值检测
|
||||
* 功能说明:根据参考数据检测最近一个数据点是否异常
|
||||
|
||||
* 调用方法:
|
||||
|
||||
加载so库:
|
||||
|
||||
```
|
||||
# Python
|
||||
so = cdll.LoadLibrary
|
||||
metis_lib = so("./libdetect.so")
|
||||
handle = metis_lib.load_model("./xgb_default_model")
|
||||
```
|
||||
|
||||
构造传入数据:
|
||||
|
||||
```
|
||||
# Python
|
||||
from ctypes import *
|
||||
|
||||
class ValueData(Structure):
|
||||
_fields_ = [('data_a', POINTER(c_int)), ('data_b', POINTER(c_int)), ('data_c', POINTER(c_int)),
|
||||
('len_a', c_int), ('len_b', c_int), ('len_c', c_int)]
|
||||
|
||||
# test data
|
||||
data_c = [1] * 361
|
||||
data_b = [1] * 361
|
||||
data_a = [1] * 180
|
||||
data_a.append(10)
|
||||
|
||||
paarray = (c_int * len(data_a))(*data_a)
|
||||
pbarray = (c_int * len(data_b))(*data_b)
|
||||
pcarray = (c_int * len(data_c))(*data_c)
|
||||
data_value = ValueData(paarray, pbarray, pcarray, len(data_a), len(data_b), len(data_c))
|
||||
```
|
||||
|
||||
调用计算函数:
|
||||
|
||||
```
|
||||
#python
|
||||
result = c_int()
|
||||
prob = c_float()
|
||||
ret_code = metis_lib.value_predict(handle, byref(data_value), byref(result), byref(prob))
|
||||
if ret_code != 0:
|
||||
print "value_predict error code = %d" % ret_code
|
||||
print result, prob
|
||||
```
|
||||
|
||||
|
||||
* 传入参数:C结构体
|
||||
|
||||
```
|
||||
typedef struct {
|
||||
int* data_a;
|
||||
int* data_b;
|
||||
int* data_c;
|
||||
int len_a;
|
||||
int len_b;
|
||||
int len_c;
|
||||
} ValueData;
|
||||
```
|
||||
* 传入参数说明:
|
||||
|
||||
| 名称 | 类型 |必填| 默认值 | 说明 |
|
||||
| --- | --- | --- |---- | --- |
|
||||
| handle| int| 是| 无|模型句柄,由load_model返回|
|
||||
| data_value| ValueData| 是| 无|待检测数据|
|
||||
|
||||
|
||||
|
||||
* 返回参数:
|
||||
```
|
||||
ret_code
|
||||
result
|
||||
prob
|
||||
```
|
||||
|
||||
* 返回参数说明:
|
||||
|
||||
| 名称 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| ret_code | int | 返回码。0:成功;非0:失败 |
|
||||
| result | c_int | 检测结果是否异常。0:异常;1:正常 |
|
||||
| prob | c_float | 概率值,值越小,判定为异常的置信度越高,目前prob<0.15,判决为异常 |
|
||||
|
||||
##### 2、率值检测
|
||||
* 功能说明:根据参考数据检测最近一个数据点是否异常
|
||||
|
||||
* 调用方法:
|
||||
|
||||
加载so库:
|
||||
|
||||
```
|
||||
# Python
|
||||
so = cdll.LoadLibrary
|
||||
metis_lib = so("./libdetect.so")
|
||||
```
|
||||
|
||||
构造传入数据:
|
||||
|
||||
```
|
||||
# Python
|
||||
from ctypes import *
|
||||
|
||||
class RateData(Structure):
|
||||
_fields_ = [('data_a', POINTER(c_double)), ('data_b', POINTER(c_double)), ('data_c', POINTER(c_double)),
|
||||
('len_a', c_int), ('len_b', c_int), ('len_c', c_int)]
|
||||
|
||||
# test data
|
||||
data_c = [1.0] * 361
|
||||
data_b = [1.0] * 361
|
||||
data_a = [1.0] * 180
|
||||
data_a.append(0.9)
|
||||
|
||||
paarray = (c_double * len(data_a))(*data_a)
|
||||
pbarray = (c_double * len(data_b))(*data_b)
|
||||
pcarray = (c_double * len(data_c))(*data_c)
|
||||
data_value = RateData(paarray, pbarray, pcarray, len(data_a), len(data_b), len(data_c))
|
||||
```
|
||||
|
||||
调用计算函数:
|
||||
|
||||
```
|
||||
#python
|
||||
result = c_int()
|
||||
prob = c_float()
|
||||
ret_code = metis_lib.rate_predict(byref(data_value), byref(result), byref(prob))
|
||||
if ret_code != 0:
|
||||
print "value_predict error code = %d" % ret_code
|
||||
print result, prob
|
||||
```
|
||||
|
||||
|
||||
* 传入参数:C结构体
|
||||
|
||||
```
|
||||
typedef struct {
|
||||
double* data_a;
|
||||
double* data_b;
|
||||
double* data_c;
|
||||
int len_a;
|
||||
int len_b;
|
||||
int len_c;
|
||||
} RateData;
|
||||
```
|
||||
* 传入参数说明:
|
||||
|
||||
| 名称 | 类型 |必填| 默认值 | 说明 |
|
||||
| --- | --- | --- |---- | --- |
|
||||
| data_value| RateData| 是| 无|待检测数据|
|
||||
|
||||
|
||||
|
||||
* 返回参数:
|
||||
```
|
||||
ret_code
|
||||
result
|
||||
prob
|
||||
```
|
||||
|
||||
* 返回参数说明:
|
||||
|
||||
| 名称 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| ret_code | int | 返回码。0:成功;非0:失败 |
|
||||
| result | c_int | 检测结果是否异常。0:异常;1:正常 |
|
||||
| prob | c_float | 概率值,值越小,判定为异常的置信度越高 |
|
||||
|
||||
#### C代码中调用:
|
||||
|
||||
在C中调用检测函数,需要include头文件detect.h,在编译时链接libdetect.so文件
|
||||
##### 1、量值检测
|
||||
* 功能说明:根据参考数据检测最近一个数据点是否异常
|
||||
|
||||
* 调用方法:
|
||||
|
||||
调用load_model加载模型,然后调用value_predict进行预测:
|
||||
|
||||
|
||||
```
|
||||
#include "detect.h"
|
||||
|
||||
if (NULL == (handle = load_model("./xgb_default_model")))
|
||||
{
|
||||
printf("load model error\n");
|
||||
return 0;
|
||||
}
|
||||
int ret = value_predict(handle, &value_data, &sample_result, &prob);
|
||||
printf ("ret=%d result = %d prob = %f\n", ret, sample_result, prob);
|
||||
```
|
||||
|
||||
* 传入参数:C结构体
|
||||
|
||||
```
|
||||
typedef struct {
|
||||
int* data_a;
|
||||
int* data_b;
|
||||
int* data_c;
|
||||
int len_a;
|
||||
int len_b;
|
||||
int len_c;
|
||||
} ValueData;
|
||||
```
|
||||
* 传入参数说明:
|
||||
|
||||
| 名称 | 类型 |必填| 默认值 | 说明 |
|
||||
| --- | --- | --- |---- | --- |
|
||||
| handle| int| 是| 无|模型句柄,由load_model返回|
|
||||
| value_data| ValueData| 是| 无|待检测数据|
|
||||
|
||||
|
||||
|
||||
* 返回参数:
|
||||
```
|
||||
ret
|
||||
sample_result
|
||||
prob
|
||||
```
|
||||
|
||||
* 返回参数说明:
|
||||
|
||||
| 名称 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| ret | int | 返回码。0:成功;非0:失败 |
|
||||
| sample_result | c_int | 检测结果是否异常。0:异常;1:正常 |
|
||||
| prob | c_float | 概率值,值越小,判定为异常的置信度越高,目前prob<0.15,判决为异常 |
|
||||
|
||||
##### 2、率值检测
|
||||
* 功能说明:根据参考数据检测最近一个数据点是否异常
|
||||
|
||||
* 调用方法:
|
||||
|
||||
```
|
||||
#include "detect.h"
|
||||
float prob;
|
||||
int sample_result;
|
||||
int ret = rate_predict(&rate_data, &sample_result, &prob);
|
||||
printf ("ret=%d result =%d prob = %f \n", ret, sample_result, prob);
|
||||
```
|
||||
|
||||
|
||||
* 传入参数:C结构体
|
||||
|
||||
```
|
||||
typedef struct {
|
||||
double* data_a;
|
||||
double* data_b;
|
||||
double* data_c;
|
||||
int len_a;
|
||||
int len_b;
|
||||
int len_c;
|
||||
} RateData;
|
||||
```
|
||||
* 传入参数说明:
|
||||
|
||||
| 名称 | 类型 |必填| 默认值 | 说明 |
|
||||
| --- | --- | --- |---- | --- |
|
||||
| rate_data| RateData| 是| 无|待检测数据|
|
||||
|
||||
|
||||
* 返回参数:
|
||||
```
|
||||
ret
|
||||
sample_result
|
||||
prob
|
||||
```
|
||||
|
||||
* 返回参数说明:
|
||||
|
||||
| 名称 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| ret | int | 返回码。0:成功;非0:失败 |
|
||||
| result | c_int | 检测结果是否异常。0:异常;1:正常 |
|
||||
| prob | c_float | 概率值,值越小,判定为异常的置信度越高 |
|
||||
|
|
@ -1,21 +1,25 @@
|
|||
## 时间序列异常检测学件的架构
|
||||
## 时间序列异常检测学件的架构
|
||||
|
||||

|
||||
|
||||
时间序列异常检测学件的整体分层涉及,可以分为以下四层:
|
||||
时间序列异常检测工程的整体分层,可以分为以下五层:
|
||||
|
||||
1. **数据层(DB)**:存储检测异常信息、样本信息、任务信息等
|
||||
1. **数据层(DB)**:存储检测异常信息、样本信息、任务信息等
|
||||
|
||||
2. **服务层(server)**: 服务层划分为四大模块
|
||||
2. **服务层(SERVICE)**: 服务层划分为两大模块
|
||||
|
||||
1. **数据驱动模块DAO**: 封装了和DB层常见的数据操作接口。
|
||||
1. **数据驱动模块DAO**: 封装了和DB层常见的数据操作接口。
|
||||
|
||||
2. **特征计算模块feature**: 提供三类时间序列的特征(统计特征、拟合特征、分类特征)用于对时序数据进行特征提取,在监督学习和训练中使用。
|
||||
2. **业务模块service**: 完成API层的具体业务逻辑。
|
||||
|
||||
3. **算法模块feature**: 提供常见的几种机器学习算法封装(统计判别算法、指数移动平均算法、多项式算法、GBDT和xgboost等)用于对序数据进行联合仲裁检测。
|
||||
3. **学件层(LEARNWARE)**:学件层划分为三大模块
|
||||
|
||||
1. **检测模块detect**: 提供时间序列异常检测接口
|
||||
|
||||
4. **业务模块business**: 业务模块是基于原子接口封装,完成API层的具体业务逻辑。
|
||||
2. **特征计算模块features**: 提供三类时间序列的特征(统计特征、拟合特征、分类特征)用于对时序数据进行特征提取,在监督学习和训练中使用。
|
||||
|
||||
3. **接口层(api)**: 提供API能力,时间序列异常检测接口和WEB管理的操作接口。
|
||||
3. **算法模块algorithm**: 提供常见的几种机器学习算法封装(统计判别算法、指数移动平均算法、多项式算法、GBDT和xgboost等)用于对序数据进行联合仲裁检测。
|
||||
|
||||
4. **接口层(API)**: 提供API能力,时间序列异常检测接口和WEB管理的操作接口。
|
||||
|
||||
4. **WEB层(web)**: 系统提供的WEB服务,通过服务界面,用户可以进行异常查询、打标标注、样本库管理、模型训练等操作。
|
||||
5. **WEB层(WEB)**: 系统提供的WEB服务,通过服务界面,用户可以进行异常查询、打标标注、样本库管理、模型训练等操作。
|
||||
|
|
@ -1,66 +1,67 @@
|
|||
## 项目目录结构
|
||||
## 项目目录结构
|
||||
|
||||
项目开发的目录结构保持一致,容易理解并方便管理。
|
||||
项目开发的目录结构保持一致,容易理解并方便管理。
|
||||
|
||||
## 目录结构
|
||||
## 目录结构
|
||||
|
||||
- `/app/` 服务端总工作目录
|
||||
- `/app/` 服务端工作目录
|
||||
|
||||
`/app/controller/` 路由入口Action层
|
||||
`/app/controller/` 路由入口Action层
|
||||
|
||||
`/app/config/` 业务配置层
|
||||
`/app/common/` 存放公共函数和常量定义
|
||||
|
||||
`/app/dao/` 数据库表实例层
|
||||
`/app/dao/` 数据库表实例层
|
||||
|
||||
`/app/model/` 模型文件存放目录
|
||||
`/app/service/` 业务逻辑层
|
||||
|
||||
`/app/service/` 业务逻辑层
|
||||
- `/uweb/` 管理端工作目录
|
||||
|
||||
`/app/service/algorithm/` 算法层
|
||||
`/uweb/custom/` WEB端所需静态文件目录
|
||||
|
||||
`/app/service/feature/` 特征层
|
||||
`/uweb/lib/` WEB端框架目录
|
||||
|
||||
`/app/utils/` 存放公共函数
|
||||
`/uweb/src/` WEB端开发目录
|
||||
|
||||
- `/uweb/` 管理端总工作目录
|
||||
`/uweb/src/pages/` WEB端所有页面的目录
|
||||
|
||||
`/uweb/custom/` WEB端所需静态文件目录
|
||||
`/uweb/src/plugins/` WEB端自定义插件目录
|
||||
|
||||
`/uweb/lib/` WEB端框架目录
|
||||
`/uweb/src/app.json` WEB端配置文件
|
||||
|
||||
`/uweb/src/` WEB端开发目录
|
||||
`/uweb/src/app.less` WEB端全局样式文件
|
||||
|
||||
`/uweb/src/pages/` WEB端所有页面的目录
|
||||
`/uweb/dist/` WEB端打包后的静态文件目录
|
||||
|
||||
`/uweb/src/plugins/` WEB端自定义插件目录
|
||||
- `/time_series_detector/` 时间序列异常检测学件目录
|
||||
|
||||
`/uweb/src/app.json` WEB端配置文件
|
||||
`/time_series_detector/model/` 模型文件存放目录
|
||||
|
||||
`/time_series_detector/algorithm/` 算法层
|
||||
|
||||
`/uweb/src/app.less` WEB端全局样式文件
|
||||
`/time_series_detector/feature/` 特征层
|
||||
|
||||
项目中支持以下类型的文件:
|
||||
1. `.json`: 配置文件
|
||||
2. `.uwx`: UWEB 视图文件
|
||||
3. `.uw`: UWEB 逻辑脚本
|
||||
4. `.js`: 普通 JavaScript 逻辑脚本
|
||||
5. `.ts`: 普通 TypeScript 逻辑脚本
|
||||
6. `.less`: Less 样式文件
|
||||
7. `.css`: CSS 样式文件
|
||||
8. `.jsx`: 开发自定义插件时可使用的 JavaScript React 脚本文件
|
||||
9. `.tsx`: 开发自定义插件时可使用的 TypeScript React 脚本文件
|
||||
10. `.png`、`.jpg`、`.gif`、`.svg`: 图片文件
|
||||
|
||||
`/uweb/dist/` WEB端打包后的静态文件目录
|
||||
|
||||
项目中支持以下类型的文件:
|
||||
1. `.json`: 配置文件
|
||||
2. `.uwx`: UWEB 视图文件
|
||||
3. `.uw`: UWEB 逻辑脚本
|
||||
4. `.js`: 普通 JavaScript 逻辑脚本
|
||||
5. `.ts`: 普通 TypeScript 逻辑脚本
|
||||
6. `.less`: Less 样式文件
|
||||
7. `.css`: CSS 样式文件
|
||||
8. `.jsx`: 开发自定义插件时可使用的 JavaScript React 脚本文件
|
||||
9. `.tsx`: 开发自定义插件时可使用的 TypeScript React 脚本文件
|
||||
10. `.png`、`.jpg`、`.gif`、`.svg`: 图片文件
|
||||
|
||||
- `/docs/` 项目文档存放目录
|
||||
- `/docs/` 项目文档存放目录
|
||||
|
||||
|
||||
## 调用关系
|
||||
## 调用关系
|
||||
|
||||
`/app/controller/` 为服务端路由入口,可调用service业务层
|
||||
`uweb` 为管理端工作目录,可调用服务端接口
|
||||
|
||||
`/app/service/` 为service业务层,可调用私有对象dao数据库层
|
||||
`/app/controller/` 为服务端路由入口,可调用service业务层
|
||||
|
||||
`/app/model/` 模型文件存放目录,供service业务层加载
|
||||
`/app/service/` 为service业务层,可调用私有对象dao数据库层和time_series_detector学件接口
|
||||
|
||||
`/time_series_detector/` 学件目录,供service业务层调用
|
||||
|
||||
`/app/utils/` 公共函数层全局可调用
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 48 KiB After Width: | Height: | Size: 70 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 6.3 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 6.9 KiB |
|
|
@ -68,15 +68,15 @@ mysql -umetis -pmetis@123 -h127.0.0.1 metis < /data/Metis/app/sql/time_series_de
|
|||
|
||||
4、将数据库配置信息更新到服务端配置文件`database.py`
|
||||
```
|
||||
vim /data/Metis/app/config/database.py
|
||||
vim /data/Metis/app/dao/db_common/database.py
|
||||
```
|
||||
改写配置
|
||||
```
|
||||
db = 'metis'
|
||||
user = 'metis'
|
||||
passwd = 'metis@123'
|
||||
host = '127.0.0.1'
|
||||
port = 3306
|
||||
DB = 'metis'
|
||||
USER = 'metis'
|
||||
PASSWD = 'metis@123'
|
||||
HOST = '127.0.0.1'
|
||||
PORT = 3306
|
||||
```
|
||||
|
||||
## 1.3. <a id="chapter-1-3"></a>服务端环境安装
|
||||
|
|
@ -111,7 +111,7 @@ export PYTHONPATH=/data/Metis:$PYTHONPATH
|
|||
|
||||
### 1.3.4. 启动服务端
|
||||
|
||||
启动服务端程序
|
||||
启动服务端程序,ip请替换为服务器真实ip地址
|
||||
|
||||
```
|
||||
python /data/Metis/app/controller/manage.py runserver {ip}:{port}
|
||||
|
|
@ -137,7 +137,7 @@ python /data/Metis/app/controller/manage.py runserver {ip}:{port}
|
|||
|
||||
运行npm run build
|
||||
|
||||
将uweb目录下的custom文件夹下复制到uweb目录下生成的dist文件夹中
|
||||
将uweb目录下的custom文件夹复制到uweb目录下生成的dist文件夹中
|
||||
|
||||
将nginx配置文件中的root定位到uweb目录下的dist文件夹
|
||||
|
||||
|
|
@ -175,6 +175,8 @@ nginx正常启动后,打开浏览器并访问 `http://${ip}:80/`
|
|||
|
||||
npm run build 项目代码开发完成后,执行该命令打包项目代码。在项目根目录会生成一个 dist 目录,然后复制custom目录,放至dist目录下。发布时,将 dist 目录中的全部文件作为静态文件,放至服务器指定的静态文件目录即可
|
||||
|
||||
安装完成后,请参考API使用说明进行API调用
|
||||
|
||||
# 2. <a id="chapter-5"></a>docker安装部署
|
||||
|
||||
## 2.1. 安装docker
|
||||
|
|
@ -195,4 +197,7 @@ docker ps
|
|||
```
|
||||
查看三个容器(metis-db、metis-web、metis-svr)启动状态,如正常启动,则安装成功。
|
||||

|
||||
如安装成功,可以通过浏览器直接访问: `http://${IP}`
|
||||
如安装成功,可以通过浏览器直接访问: `http://${IP}`
|
||||
注意:Metis依赖80和8080端口,腾讯云服务器默认开通了80但没有开通8080的外网访问权限,需要手动在安全组中增加对8080端口的放通。
|
||||
|
||||
请参考API使用说明进行API调用
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
__all__ = ["fixtures", "test_feature"]
|
||||
|
|
@ -9,8 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed
|
|||
"""
|
||||
|
||||
from tests.fixtures import DataTestCase
|
||||
from app.service.time_series_detector.feature.statistical_features import *
|
||||
|
||||
from time_series_detector.feature.statistical_features import *
|
||||
|
||||
class FeatureTestCase(DataTestCase):
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
__all__ = ["algorithm", "feature", "common", "detect"]
|
||||
|
|
@ -8,9 +8,9 @@ https://opensource.org/licenses/BSD-3-Clause
|
|||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
|
||||
from app.service.time_series_detector.algorithm import ewma
|
||||
from app.service.time_series_detector.algorithm import polynomial_interpolation
|
||||
from app.config.common import *
|
||||
from time_series_detector.algorithm import ewma
|
||||
from time_series_detector.algorithm import polynomial_interpolation
|
||||
from time_series_detector.common.tsd_common import *
|
||||
|
||||
|
||||
class EwmaAndPolynomialInterpolation(object):
|
||||
|
|
@ -13,13 +13,12 @@ import pickle
|
|||
import numpy as np
|
||||
from sklearn.ensemble import GradientBoostingClassifier
|
||||
from sklearn.externals import joblib
|
||||
from app.service.time_series_detector.feature import feature_service
|
||||
from app.utils.utils import *
|
||||
from app.config.errorcode import *
|
||||
from app.config.common import *
|
||||
from time_series_detector.feature import feature_service
|
||||
from time_series_detector.common.tsd_common import *
|
||||
from time_series_detector.common.tsd_errorcode import *
|
||||
|
||||
|
||||
MODEL_PATH = os.path.join(os.path.dirname(__file__), '../../../model/time_series_detector/')
|
||||
MODEL_PATH = os.path.join(os.path.dirname(__file__), '../model/')
|
||||
DEFAULT_MODEL = MODEL_PATH + "gbdt_default_model"
|
||||
|
||||
|
||||
|
|
@ -73,7 +72,7 @@ class Gbdt(object):
|
|||
y_train = []
|
||||
features = self.__calculate_features(data, window)
|
||||
if features:
|
||||
return LACK_SAMPLE
|
||||
return TSD_LACK_SAMPLE
|
||||
for index in features:
|
||||
X_train.append(index[0])
|
||||
y_train.append(index[1])
|
||||
|
|
@ -85,8 +84,8 @@ class Gbdt(object):
|
|||
model_name = MODEL_PATH + task_id + "_model"
|
||||
joblib.dump(grd, model_name)
|
||||
except Exception as ex:
|
||||
return TRAIN_ERR, str(ex)
|
||||
return OP_SUCCESS, ""
|
||||
return TSD_TRAIN_ERR, str(ex)
|
||||
return TSD_OP_SUCCESS, ""
|
||||
|
||||
def predict(self, X, window=DEFAULT_WINDOW, model_name=DEFAULT_MODEL):
|
||||
"""
|
||||
|
|
@ -9,7 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed
|
|||
"""
|
||||
|
||||
from sklearn.ensemble import IsolationForest
|
||||
from app.config.common import *
|
||||
from time_series_detector.common.tsd_common import *
|
||||
|
||||
|
||||
class IForest(object):
|
||||
|
|
@ -12,7 +12,7 @@ import numpy as np
|
|||
from sklearn.linear_model import Ridge
|
||||
from sklearn.preprocessing import PolynomialFeatures
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from app.config.common import *
|
||||
from time_series_detector.common.tsd_common import *
|
||||
|
||||
|
||||
class PolynomialInterpolation(object):
|
||||
|
|
@ -10,11 +10,10 @@ Unless required by applicable law or agreed to in writing, software distributed
|
|||
|
||||
import os
|
||||
import xgboost as xgb
|
||||
from app.service.time_series_detector.feature import feature_service
|
||||
from app.utils.utils import *
|
||||
from app.config.errorcode import *
|
||||
from app.config.common import *
|
||||
MODEL_PATH = os.path.join(os.path.dirname(__file__), '../../../model/time_series_detector/')
|
||||
from time_series_detector.feature import feature_service
|
||||
from time_series_detector.common.tsd_errorcode import *
|
||||
from time_series_detector.common.tsd_common import *
|
||||
MODEL_PATH = os.path.join(os.path.dirname(__file__), '../model/')
|
||||
DEFAULT_MODEL = MODEL_PATH + "xgb_default_model"
|
||||
|
||||
|
||||
|
|
@ -76,7 +75,7 @@ class XGBoosting(object):
|
|||
try:
|
||||
f = open(feature_file_name, "w")
|
||||
except Exception as ex:
|
||||
return CAL_FEATURE_ERR, str(ex)
|
||||
return TSD_CAL_FEATURE_ERR, str(ex)
|
||||
times = 0
|
||||
for temp in data:
|
||||
if times > 0:
|
||||
|
|
@ -86,7 +85,7 @@ class XGBoosting(object):
|
|||
for x in result:
|
||||
f.write(' ' + x)
|
||||
times = times + 1
|
||||
return OP_SUCCESS, ""
|
||||
return TSD_OP_SUCCESS, ""
|
||||
|
||||
def __calculate_features(self, data, feature_file_name, window=DEFAULT_WINDOW):
|
||||
"""
|
||||
|
|
@ -106,7 +105,7 @@ class XGBoosting(object):
|
|||
try:
|
||||
ret_code, ret_data = self.__save_libsvm_format(features, feature_file_name)
|
||||
except Exception as ex:
|
||||
ret_code = CAL_FEATURE_ERR
|
||||
ret_code = TSD_CAL_FEATURE_ERR
|
||||
ret_data = str(ex)
|
||||
return ret_code, ret_data
|
||||
|
||||
|
|
@ -121,12 +120,12 @@ class XGBoosting(object):
|
|||
model_name = MODEL_PATH + task_id + "_model"
|
||||
feature_file_name = MODEL_PATH + task_id + "_features"
|
||||
ret_code, ret_data = self.__calculate_features(data, feature_file_name)
|
||||
if ret_code != OP_SUCCESS:
|
||||
if ret_code != TSD_OP_SUCCESS:
|
||||
return ret_code, ret_data
|
||||
try:
|
||||
dtrain = xgb.DMatrix(feature_file_name)
|
||||
except Exception as ex:
|
||||
return READ_FEATURE_FAILED, str(ex)
|
||||
return TSD_READ_FEATURE_FAILED, str(ex)
|
||||
params = {
|
||||
'max_depth': self.max_depth,
|
||||
'eta': self.eta,
|
||||
|
|
@ -143,8 +142,8 @@ class XGBoosting(object):
|
|||
bst = xgb.train(params, dtrain, num_round)
|
||||
bst.save_model(model_name)
|
||||
except Exception as ex:
|
||||
return TRAIN_ERR, str(ex)
|
||||
return OP_SUCCESS, ""
|
||||
return TSD_TRAIN_ERR, str(ex)
|
||||
return TSD_OP_SUCCESS, ""
|
||||
|
||||
def predict(self, X, window=DEFAULT_WINDOW, model_name=DEFAULT_MODEL):
|
||||
"""
|
||||
|
|
@ -0,0 +1 @@
|
|||
__all__ = ["tsd_common", "tsd_errorcode"]
|
||||
|
|
@ -1,99 +1,97 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
Tencent is pleased to support the open source community by making Metis available.
|
||||
Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
|
||||
https://opensource.org/licenses/BSD-3-Clause
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import traceback
|
||||
from functools import wraps
|
||||
from app.config.errorcode import *
|
||||
from app.config.common import *
|
||||
|
||||
|
||||
def is_standard_time_series(time_series, window=DEFAULT_WINDOW):
|
||||
"""
|
||||
Check the length of time_series. If window = 180, then the length of time_series should be 903.
|
||||
The mean value of last window should be larger than 0.
|
||||
|
||||
:param time_series: the time series to check, like [data_c, data_b, data_a]
|
||||
:type time_series: pandas.Series
|
||||
:param window: the length of window
|
||||
:return: True or False
|
||||
:return type: boolean
|
||||
"""
|
||||
return bool(len(time_series) == 5 * window + 3 and np.mean(time_series[(4 * window + 2):]) > 0)
|
||||
|
||||
|
||||
def split_time_series(time_series, window=DEFAULT_WINDOW):
|
||||
"""
|
||||
Spilt the time_series into five parts. Each has a length of window + 1
|
||||
|
||||
:param time_series: [data_c, data_b, data_a]
|
||||
:param window: the length of window
|
||||
:return: spilt list [[data_c_left], [data_c_right], [data_b_left], [data_b_right], [data_a]]
|
||||
"""
|
||||
data_c_left = time_series[0:(window + 1)]
|
||||
data_c_right = time_series[window:(2 * window + 1)]
|
||||
data_b_left = time_series[(2 * window + 1):(3 * window + 2)]
|
||||
data_b_right = time_series[(3 * window + 1):(4 * window + 2)]
|
||||
data_a = time_series[(4 * window + 2):]
|
||||
split_time_series = [
|
||||
data_c_left,
|
||||
data_c_right,
|
||||
data_b_left,
|
||||
data_b_right,
|
||||
data_a
|
||||
]
|
||||
return split_time_series
|
||||
|
||||
|
||||
def normalize_time_series(split_time_series):
|
||||
"""
|
||||
Normalize the split_time_series.
|
||||
|
||||
:param split_time_series: [[data_c_left], [data_c_right], [data_b_left], [data_b_right], [data_a]]
|
||||
:return: all list / mean(split_time_series)
|
||||
"""
|
||||
value = np.mean(split_time_series[4])
|
||||
if value > 1:
|
||||
normalized_data_c_left = list(split_time_series[0] / value)
|
||||
normalized_data_c_right = list(split_time_series[1] / value)
|
||||
normalized_data_b_left = list(split_time_series[2] / value)
|
||||
normalized_data_b_right = list(split_time_series[3] / value)
|
||||
normalized_data_a = list(split_time_series[4] / value)
|
||||
else:
|
||||
normalized_data_c_left = split_time_series[0]
|
||||
normalized_data_c_right = split_time_series[1]
|
||||
normalized_data_b_left = split_time_series[2]
|
||||
normalized_data_b_right = split_time_series[3]
|
||||
normalized_data_a = split_time_series[4]
|
||||
normalized_split_time_series = [
|
||||
normalized_data_c_left,
|
||||
normalized_data_c_right,
|
||||
normalized_data_b_left,
|
||||
normalized_data_b_right,
|
||||
normalized_data_a
|
||||
]
|
||||
return normalized_split_time_series
|
||||
|
||||
|
||||
def build_ret_data(ret_code, data=""):
|
||||
return {"code": ret_code, "msg": ERR_CODE[ret_code], "data": data}
|
||||
|
||||
|
||||
def exce_service(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
ret_code, ret_data = func(*args, **kwargs)
|
||||
return_dict = build_ret_data(ret_code, ret_data)
|
||||
except Exception as ex:
|
||||
traceback.print_exc()
|
||||
return_dict = build_ret_data(THROW_EXP, str(ex))
|
||||
return return_dict
|
||||
return wrapper
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
Tencent is pleased to support the open source community by making Metis available.
|
||||
Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
|
||||
https://opensource.org/licenses/BSD-3-Clause
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
DEFAULT_WINDOW = 180
|
||||
|
||||
|
||||
def is_standard_time_series(time_series, window=DEFAULT_WINDOW):
|
||||
"""
|
||||
Check the length of time_series. If window = 180, then the length of time_series should be 903.
|
||||
The mean value of last window should be larger than 0.
|
||||
|
||||
:param time_series: the time series to check, like [data_c, data_b, data_a]
|
||||
:type time_series: pandas.Series
|
||||
:param window: the length of window
|
||||
:return: True or False
|
||||
:return type: boolean
|
||||
"""
|
||||
return bool(len(time_series) == 5 * window + 3 and np.mean(time_series[(4 * window + 2):]) > 0)
|
||||
|
||||
|
||||
def split_time_series(time_series, window=DEFAULT_WINDOW):
|
||||
"""
|
||||
Spilt the time_series into five parts. Each has a length of window + 1
|
||||
|
||||
:param time_series: [data_c, data_b, data_a]
|
||||
:param window: the length of window
|
||||
:return: spilt list [[data_c_left], [data_c_right], [data_b_left], [data_b_right], [data_a]]
|
||||
"""
|
||||
data_c_left = time_series[0:(window + 1)]
|
||||
data_c_right = time_series[window:(2 * window + 1)]
|
||||
data_b_left = time_series[(2 * window + 1):(3 * window + 2)]
|
||||
data_b_right = time_series[(3 * window + 1):(4 * window + 2)]
|
||||
data_a = time_series[(4 * window + 2):]
|
||||
split_time_series = [
|
||||
data_c_left,
|
||||
data_c_right,
|
||||
data_b_left,
|
||||
data_b_right,
|
||||
data_a
|
||||
]
|
||||
return split_time_series
|
||||
|
||||
|
||||
def normalize_time_series(split_time_series):
|
||||
"""
|
||||
Normalize the split_time_series.
|
||||
|
||||
:param split_time_series: [[data_c_left], [data_c_right], [data_b_left], [data_b_right], [data_a]]
|
||||
:return: all list / mean(split_time_series)
|
||||
"""
|
||||
value = np.mean(split_time_series[4])
|
||||
if value > 1:
|
||||
normalized_data_c_left = list(split_time_series[0] / value)
|
||||
normalized_data_c_right = list(split_time_series[1] / value)
|
||||
normalized_data_b_left = list(split_time_series[2] / value)
|
||||
normalized_data_b_right = list(split_time_series[3] / value)
|
||||
normalized_data_a = list(split_time_series[4] / value)
|
||||
else:
|
||||
normalized_data_c_left = split_time_series[0]
|
||||
normalized_data_c_right = split_time_series[1]
|
||||
normalized_data_b_left = split_time_series[2]
|
||||
normalized_data_b_right = split_time_series[3]
|
||||
normalized_data_a = split_time_series[4]
|
||||
normalized_split_time_series = [
|
||||
normalized_data_c_left,
|
||||
normalized_data_c_right,
|
||||
normalized_data_b_left,
|
||||
normalized_data_b_right,
|
||||
normalized_data_a
|
||||
]
|
||||
return normalized_split_time_series
|
||||
|
||||
|
||||
def normalize_time_series_by_max_min(split_time_series):
|
||||
"""
|
||||
Normalize the split_time_series by max_min_normalization.
|
||||
|
||||
:param split_time_series: [[data_c_left], [data_c_right], [data_b_left], [data_b_right], [data_a]]
|
||||
:return: max_min_normalized time_series
|
||||
"""
|
||||
time_series = split_time_series[0] + split_time_series[1][1:] + split_time_series[2] + split_time_series[3][1:] + split_time_series[4]
|
||||
max_value = np.max(time_series)
|
||||
min_value = np.min(time_series)
|
||||
normalized_time_series = [0.0]*len(time_series)
|
||||
if max_value - min_value > 0:
|
||||
normalized_time_series = list((np.array(time_series) - min_value) / float(max_value - min_value))
|
||||
|
||||
return normalized_time_series
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
Tencent is pleased to support the open source community by making Metis available.
|
||||
Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
|
||||
https://opensource.org/licenses/BSD-3-Clause
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
|
||||
TSD_OP_SUCCESS = 0
|
||||
TSD_THROW_EXP = 1000
|
||||
TSD_CHECK_PARAM_FAILED = 1002
|
||||
TSD_FILE_FORMAT_ERR = 1003
|
||||
TSD_CAL_FEATURE_ERR = 2001
|
||||
TSD_READ_FEATURE_FAILED = 2002
|
||||
TSD_TRAIN_ERR = 2003
|
||||
TSD_LACK_SAMPLE = 2004
|
||||
|
||||
ERR_CODE = {
|
||||
TSD_OP_SUCCESS: "操作成功",
|
||||
TSD_THROW_EXP: "抛出异常",
|
||||
TSD_CHECK_PARAM_FAILED: "参数检查失败",
|
||||
TSD_FILE_FORMAT_ERR: "文件格式有误",
|
||||
TSD_CAL_FEATURE_ERR: "特征计算出错",
|
||||
TSD_READ_FEATURE_FAILED: "读取特征数据失败",
|
||||
TSD_TRAIN_ERR: "训练出错",
|
||||
TSD_LACK_SAMPLE: "缺少正样本或负样本"
|
||||
}
|
||||
|
|
@ -0,0 +1,124 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
Tencent is pleased to support the open source community by making Metis available.
|
||||
Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
|
||||
https://opensource.org/licenses/BSD-3-Clause
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
|
||||
import os
|
||||
from time_series_detector.algorithm import isolation_forest, ewma, polynomial_interpolation, statistic, xgboosting
|
||||
from time_series_detector.common.tsd_errorcode import *
|
||||
from time_series_detector.common.tsd_common import *
|
||||
MODEL_PATH = os.path.join(os.path.dirname(__file__), './model/')
|
||||
|
||||
|
||||
class Detect(object):
|
||||
|
||||
def __init__(self):
|
||||
self.iforest_obj = isolation_forest.IForest()
|
||||
self.ewma_obj = ewma.Ewma()
|
||||
self.polynomial_obj = polynomial_interpolation.PolynomialInterpolation()
|
||||
self.statistic_obj = statistic.Statistic()
|
||||
self.supervised_obj = xgboosting.XGBoosting()
|
||||
|
||||
def __list_is_digit(self, data):
|
||||
for index in data:
|
||||
try:
|
||||
float(index)
|
||||
except ValueError:
|
||||
return False
|
||||
return True
|
||||
|
||||
def __check_param(self, data):
|
||||
if ("dataC" not in data.keys()) or ("dataB" not in data.keys()) or ("dataA" not in data.keys()):
|
||||
return TSD_CHECK_PARAM_FAILED, "missing parameter"
|
||||
if not data['dataA']:
|
||||
return TSD_CHECK_PARAM_FAILED, "dataA can not be empty"
|
||||
if not data['dataB']:
|
||||
return TSD_CHECK_PARAM_FAILED, "dataB can not be empty"
|
||||
if not data['dataC']:
|
||||
return TSD_CHECK_PARAM_FAILED, "dataC can not be empty"
|
||||
if not self.__list_is_digit(data['dataA'].split(',')):
|
||||
return TSD_CHECK_PARAM_FAILED, "dataA contains illegal numbers"
|
||||
if not self.__list_is_digit(data['dataB'].split(',')):
|
||||
return TSD_CHECK_PARAM_FAILED, "dataB contains illegal numbers"
|
||||
if not self.__list_is_digit(data['dataC'].split(',')):
|
||||
return TSD_CHECK_PARAM_FAILED, "dataC contains illegal numbers"
|
||||
if "window" in data:
|
||||
window = data["window"]
|
||||
else:
|
||||
window = DEFAULT_WINDOW
|
||||
if len(data['dataC'].split(',')) != (2 * window + 1):
|
||||
return TSD_CHECK_PARAM_FAILED, "dataC length does not match"
|
||||
if len(data['dataB'].split(',')) != (2 * window + 1):
|
||||
return TSD_CHECK_PARAM_FAILED, "dataB length does not match"
|
||||
if len(data['dataA'].split(',')) != (window + 1):
|
||||
return TSD_CHECK_PARAM_FAILED, "dataA length does not match"
|
||||
return TSD_OP_SUCCESS, ""
|
||||
|
||||
def value_predict(self, data):
|
||||
"""
|
||||
Predict if the latest value is an outlier or not.
|
||||
|
||||
:param data: The attributes are:
|
||||
'window', the length of window,
|
||||
'taskId', the id of detect model,
|
||||
'dataC', a piece of data to learn,
|
||||
'dataB', a piece of data to learn,
|
||||
'dataA', a piece of data to learn and the latest value to be detected.
|
||||
:type data: Dictionary-like object
|
||||
:return: The attributes are:
|
||||
'p', the class probability,
|
||||
'ret', the result of detect(1 denotes normal, 0 denotes abnormal).
|
||||
"""
|
||||
ret_code, ret_data = self.__check_param(data)
|
||||
if ret_code != TSD_OP_SUCCESS:
|
||||
return ret_code, ret_data
|
||||
if "taskId" in data and data["taskId"]:
|
||||
model_name = MODEL_PATH + data["taskId"] + "_model"
|
||||
else:
|
||||
model_name = MODEL_PATH + "xgb_default_model"
|
||||
combined_data = data["dataC"] + "," + data["dataB"] + "," + data["dataA"]
|
||||
time_series = map(int, combined_data.split(','))
|
||||
if "window" in data:
|
||||
window = data["window"]
|
||||
else:
|
||||
window = DEFAULT_WINDOW
|
||||
statistic_result = self.statistic_obj.predict(time_series)
|
||||
ewma_result = self.ewma_obj.predict(time_series)
|
||||
polynomial_result = self.polynomial_obj.predict(time_series, window)
|
||||
if statistic_result == 0 or ewma_result == 0 or polynomial_result == 0 :
|
||||
xgb_result = self.supervised_obj.predict(time_series, window, model_name)
|
||||
res_value = xgb_result[0]
|
||||
prob = xgb_result[1]
|
||||
else:
|
||||
res_value = 1
|
||||
prob = 1
|
||||
ret_data = {"ret": res_value, "p": str(prob)}
|
||||
return TSD_OP_SUCCESS, ret_data
|
||||
|
||||
def rate_predict(self, data):
|
||||
"""
|
||||
Predict if the latest value is an outlier or not.
|
||||
|
||||
:param data: The attributes are:
|
||||
'dataC', a piece of data to learn,
|
||||
'dataB', a piece of data to learn,
|
||||
'dataA', a piece of data to learn and the latest value to be detected.
|
||||
:type data: Dictionary-like object
|
||||
:return: The attributes are:
|
||||
'p', the class probability,
|
||||
'ret', the result of detect(1 denotes normal, 0 denotes abnormal).
|
||||
"""
|
||||
combined_data = data["dataC"] + "," + data["dataB"] + "," + data["dataA"]
|
||||
time_series = map(float, combined_data.split(','))
|
||||
statistic_result = self.statistic_obj.predict(time_series)
|
||||
if statistic_result == 0:
|
||||
prob = 0
|
||||
else:
|
||||
prob = 1
|
||||
ret_data = {"ret": statistic_result, "p": str(prob)}
|
||||
return TSD_OP_SUCCESS, ret_data
|
||||
|
|
@ -0,0 +1,211 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
Tencent is pleased to support the open source community by making Metis available.
|
||||
Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
|
||||
https://opensource.org/licenses/BSD-3-Clause
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import tsfresh.feature_extraction.feature_calculators as ts_feature_calculators
|
||||
from time_series_detector.common.tsd_common import DEFAULT_WINDOW, split_time_series
|
||||
from statistical_features import time_series_mean, time_series_variance, time_series_standard_deviation, time_series_median
|
||||
|
||||
|
||||
def time_series_autocorrelation(x):
|
||||
"""
|
||||
Calculates the autocorrelation of the specified lag, according to the formula [1]
|
||||
|
||||
.. math::
|
||||
|
||||
\\frac{1}{(n-l)\sigma^{2}} \\sum_{t=1}^{n-l}(X_{t}-\\mu )(X_{t+l}-\\mu)
|
||||
|
||||
where :math:`n` is the length of the time series :math:`X_i`, :math:`\sigma^2` its variance and :math:`\mu` its
|
||||
mean. `l` denotes the lag.
|
||||
|
||||
.. rubric:: References
|
||||
|
||||
[1] https://en.wikipedia.org/wiki/Autocorrelation#Estimation
|
||||
|
||||
:param x: the time series to calculate the feature of
|
||||
:type x: pandas.Series
|
||||
:param lag: the lag
|
||||
:type lag: int
|
||||
:return: the value of this feature
|
||||
:return type: float
|
||||
"""
|
||||
lag = int((len(x) - 3) / 5)
|
||||
if np.sqrt(np.var(x)) < 1e-10:
|
||||
return 0
|
||||
return ts_feature_calculators.autocorrelation(x, lag)
|
||||
|
||||
|
||||
def time_series_coefficient_of_variation(x):
|
||||
"""
|
||||
Calculates the coefficient of variation, mean value / square root of variation
|
||||
|
||||
:param x: the time series to calculate the feature of
|
||||
:type x: pandas.Series
|
||||
:return: the value of this feature
|
||||
:return type: float
|
||||
"""
|
||||
if np.sqrt(np.var(x)) < 1e-10:
|
||||
return 0
|
||||
return np.mean(x) / np.sqrt(np.var(x))
|
||||
|
||||
|
||||
def time_series_binned_entropy(x):
|
||||
"""
|
||||
First bins the values of x into max_bins equidistant bins.
|
||||
Then calculates the value of
|
||||
|
||||
.. math::
|
||||
|
||||
- \\sum_{k=0}^{min(max\\_bins, len(x))} p_k log(p_k) \\cdot \\mathbf{1}_{(p_k > 0)}
|
||||
|
||||
where :math:`p_k` is the percentage of samples in bin :math:`k`.
|
||||
|
||||
:param x: the time series to calculate the feature of
|
||||
:type x: pandas.Series
|
||||
:param max_bins: the maximal number of bins
|
||||
:type max_bins: int
|
||||
:return: the value of this feature
|
||||
:return type: float
|
||||
"""
|
||||
max_bins = [2, 4, 6, 8, 10, 20]
|
||||
result = []
|
||||
for value in max_bins:
|
||||
result.append(ts_feature_calculators.binned_entropy(x, value))
|
||||
return result
|
||||
|
||||
|
||||
def time_series_value_distribution(x):
|
||||
"""
|
||||
Given buckets, calculate the percentage of elements in the whole time series
|
||||
in different buckets
|
||||
|
||||
:param x: normalized time series
|
||||
:type x: pandas.Series
|
||||
:return: the values of this feature
|
||||
:return type: list
|
||||
"""
|
||||
thresholds = [0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0, 1.0]
|
||||
return list(np.histogram(x, bins=thresholds)[0] / float(len(x)))
|
||||
|
||||
|
||||
def time_series_daily_parts_value_distribution(x):
|
||||
"""
|
||||
Given buckets, calculate the percentage of elements in three subsequences
|
||||
of the whole time series in different buckets
|
||||
|
||||
:param x: normalized time series
|
||||
:type x: pandas.Series
|
||||
:return: the values of this feature
|
||||
:return type: list
|
||||
"""
|
||||
thresholds = [0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0, 1.0]
|
||||
split_value_list = split_time_series(x, DEFAULT_WINDOW)
|
||||
data_c = split_value_list[0] + split_value_list[1][1:]
|
||||
data_b = split_value_list[2] + split_value_list[3][1:]
|
||||
data_a = split_value_list[4]
|
||||
count_c = list(np.histogram(data_c, bins=thresholds)[0])
|
||||
count_b = list(np.histogram(data_b, bins=thresholds)[0])
|
||||
count_a = list(np.histogram(data_a, bins=thresholds)[0])
|
||||
return list(np.array(count_c) / float(len(data_c))) + list(np.array(count_b) / float(len(data_b))) + list(np.array(count_a) / float(len(data_a)))
|
||||
|
||||
|
||||
def time_series_daily_parts_value_distribution_with_threshold(x):
|
||||
"""
|
||||
Split the whole time series into three parts: c, b, a.
|
||||
Given a threshold = 0.01, return the percentage of elements of time series
|
||||
which are less than threshold
|
||||
|
||||
:param x: normalized time series
|
||||
:type x: pandas.Series
|
||||
:return: 6 values of this feature
|
||||
:return type: list
|
||||
"""
|
||||
threshold = 0.01
|
||||
split_value_list = split_time_series(x, DEFAULT_WINDOW)
|
||||
data_c = split_value_list[0] + split_value_list[1][1:]
|
||||
data_b = split_value_list[2] + split_value_list[3][1:]
|
||||
data_a = split_value_list[4]
|
||||
|
||||
# the number of elements in time series which is less than threshold:
|
||||
nparray_data_c_threshold = np.array(data_c)
|
||||
nparray_data_c_threshold[nparray_data_c_threshold < threshold] = -1
|
||||
nparray_data_b_threshold = np.array(data_b)
|
||||
nparray_data_b_threshold[nparray_data_b_threshold < threshold] = -1
|
||||
nparray_data_a_threshold = np.array(data_a)
|
||||
nparray_data_a_threshold[nparray_data_a_threshold < threshold] = -1
|
||||
|
||||
# the total number of elements in time series which is less than threshold:
|
||||
nparray_threshold_count = (nparray_data_c_threshold == -1).sum() + (nparray_data_b_threshold == -1).sum() + (nparray_data_a_threshold == -1).sum()
|
||||
|
||||
if nparray_threshold_count == 0:
|
||||
features = [0, 0, 0]
|
||||
else:
|
||||
features = [
|
||||
(nparray_data_c_threshold == -1).sum() / float(nparray_threshold_count),
|
||||
(nparray_data_b_threshold == -1).sum() / float(nparray_threshold_count),
|
||||
(nparray_data_a_threshold == -1).sum() / float(nparray_threshold_count)
|
||||
]
|
||||
|
||||
features.extend([
|
||||
(nparray_data_c_threshold == -1).sum() / float(len(data_c)),
|
||||
(nparray_data_b_threshold == -1).sum() / float(len(data_b)),
|
||||
(nparray_data_a_threshold == -1).sum() / float(len(data_a))
|
||||
])
|
||||
return features
|
||||
|
||||
|
||||
def time_series_window_parts_value_distribution_with_threshold(x):
|
||||
"""
|
||||
Split the whole time series into five parts.
|
||||
Given a threshold = 0.01, return the percentage of elements of time series
|
||||
which are less than threshold
|
||||
|
||||
:param x: normalized time series
|
||||
:type x: pandas.Series
|
||||
:return: 5 values of this feature
|
||||
:return type: list
|
||||
"""
|
||||
threshold = 0.01
|
||||
split_value_list = split_time_series(x, DEFAULT_WINDOW)
|
||||
|
||||
count_list = []
|
||||
for value_list in split_value_list:
|
||||
nparray_threshold = np.array(value_list)
|
||||
nparray_threshold[nparray_threshold < threshold] = -1
|
||||
count_list.append((nparray_threshold == -1).sum())
|
||||
|
||||
if sum(count_list) == 0:
|
||||
features = [0, 0, 0, 0, 0]
|
||||
else:
|
||||
features = list(np.array(count_list) / float((DEFAULT_WINDOW + 1)))
|
||||
|
||||
return features
|
||||
|
||||
|
||||
# add yourself classification features here...
|
||||
|
||||
|
||||
def get_classification_features(x):
|
||||
classification_features = [
|
||||
time_series_mean(x),
|
||||
time_series_variance(x),
|
||||
time_series_standard_deviation(x),
|
||||
time_series_median(x),
|
||||
time_series_autocorrelation(x),
|
||||
time_series_coefficient_of_variation(x)
|
||||
]
|
||||
classification_features.extend(time_series_value_distribution(x))
|
||||
classification_features.extend(time_series_daily_parts_value_distribution(x))
|
||||
classification_features.extend(time_series_daily_parts_value_distribution_with_threshold(x))
|
||||
classification_features.extend(time_series_window_parts_value_distribution_with_threshold(x))
|
||||
classification_features.extend(time_series_binned_entropy(x))
|
||||
# add yourself classification features here...
|
||||
|
||||
return classification_features
|
||||
|
|
@ -11,7 +11,7 @@ Unless required by applicable law or agreed to in writing, software distributed
|
|||
import statistical_features
|
||||
import classification_features
|
||||
import fitting_features
|
||||
from app.utils import utils
|
||||
from time_series_detector.common import tsd_common
|
||||
|
||||
|
||||
def extract_features(time_series, window):
|
||||
|
|
@ -25,18 +25,19 @@ def extract_features(time_series, window):
|
|||
:return: the value of features
|
||||
:return type: list with float
|
||||
"""
|
||||
if not utils.is_standard_time_series(time_series, window):
|
||||
if not tsd_common.is_standard_time_series(time_series, window):
|
||||
# add your report of this error here...
|
||||
|
||||
return []
|
||||
|
||||
# spilt time_series
|
||||
split_time_series = utils.split_time_series(time_series, window)
|
||||
split_time_series = tsd_common.split_time_series(time_series, window)
|
||||
# nomalize time_series
|
||||
normalized_split_time_series = utils.normalize_time_series(split_time_series)
|
||||
normalized_split_time_series = tsd_common.normalize_time_series(split_time_series)
|
||||
max_min_normalized_time_series = tsd_common.normalize_time_series_by_max_min(split_time_series)
|
||||
s_features = statistical_features.get_statistical_features(normalized_split_time_series[4])
|
||||
f_features = fitting_features.get_fitting_features(normalized_split_time_series)
|
||||
c_features = classification_features.get_classification_features(normalized_split_time_series[0] + normalized_split_time_series[1][1:] + normalized_split_time_series[2] + normalized_split_time_series[3][1:] + normalized_split_time_series[4])
|
||||
c_features = classification_features.get_classification_features(max_min_normalized_time_series)
|
||||
# combine features with types
|
||||
features = s_features + f_features + c_features
|
||||
return features
|
||||
|
|
@ -9,7 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed
|
|||
"""
|
||||
|
||||
import numpy as np
|
||||
from app.config.common import *
|
||||
from time_series_detector.common.tsd_common import *
|
||||
|
||||
|
||||
def time_series_moving_average(x):
|
||||
|
|
@ -49,7 +49,7 @@ def time_series_weighted_moving_average(x):
|
|||
for w in range(1, min(50, DEFAULT_WINDOW), 5):
|
||||
w = min(len(x), w) # avoid the case len(value_list) < w
|
||||
coefficient = np.array(range(1, w + 1))
|
||||
temp_list.append((np.dot(coefficient, x[-w:])) / (w * (w + 1) / 2))
|
||||
temp_list.append((np.dot(coefficient, x[-w:])) / float(w * (w + 1) / 2))
|
||||
return list(np.array(temp_list) - x[-1])
|
||||
|
||||
|
||||
|
|
@ -210,6 +210,11 @@ def time_series_periodic_features(data_c_left, data_c_right, data_b_left, data_b
|
|||
periodic_features.append(-1)
|
||||
else:
|
||||
periodic_features.append(1)
|
||||
|
||||
step = DEFAULT_WINDOW / 6
|
||||
for w in range(1, DEFAULT_WINDOW, step):
|
||||
periodic_features.append(min(max(data_a[w - 1:w + step]) - data_a[-1], 0))
|
||||
periodic_features.append(max(min(data_a[w - 1:w + step]) - data_a[-1], 0))
|
||||
return periodic_features
|
||||
|
||||
# add yourself fitting features here...
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
Tencent is pleased to support the open source community by making Metis available.
|
||||
Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
|
||||
https://opensource.org/licenses/BSD-3-Clause
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _DETECT_H
|
||||
#define _DETECT_H
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
int* data_a;
|
||||
int* data_b;
|
||||
int* data_c;
|
||||
int len_a;
|
||||
int len_b;
|
||||
int len_c;
|
||||
} ValueData;
|
||||
|
||||
typedef struct {
|
||||
double* data_a;
|
||||
double* data_b;
|
||||
double* data_c;
|
||||
int len_a;
|
||||
int len_b;
|
||||
int len_c;
|
||||
} RateData;
|
||||
|
||||
enum TSD_ERR_CODE
|
||||
{
|
||||
TSD_SUCCESS = 0,
|
||||
TSD_INVALID_HANDLER = -1,
|
||||
TSD_CHECK_PARAM_FAILED = -2,
|
||||
TSD_TIMESERIES_INIT_ERROR = -3
|
||||
};
|
||||
|
||||
enum TSD_SAMPLE_RESULT
|
||||
{
|
||||
TSD_NEGATIVE = 0,
|
||||
TSD_POSITIVE = 1
|
||||
};
|
||||
|
||||
/*!
|
||||
* \load xgb model from xgb file
|
||||
* \param fname xgb file path and name
|
||||
* \return handle when success, NULL when failure happens
|
||||
*/
|
||||
void * load_model(const char *fname);
|
||||
|
||||
/*!
|
||||
* \Predict if the latest value is an outlier or not.
|
||||
* \param mhandle the handle of the xgb model
|
||||
* \param data the input data
|
||||
* \param sample_result:(1 denotes noraml, 0 denotes abnormal).
|
||||
* \return 0 when success, <0 when failure happens
|
||||
*/
|
||||
int value_predict(void * mhandle, ValueData* data, int* sample_result, float* prob);
|
||||
|
||||
/*!
|
||||
* \Predict if the latest value is an outlier or not.
|
||||
* \param mhandle the handle of the xgb model
|
||||
* \param data the input data
|
||||
* \param sample_result:(1 denotes noraml, 0 denotes abnormal).
|
||||
* \return 0 when success, <0 when failure happens
|
||||
*/
|
||||
int rate_predict(RateData* data, int* sample_result, float* prob);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue