Compare commits

...

15 Commits

Author SHA1 Message Date
lxd1190 bf50841faf
Merge pull request #60 from ct-git/master
加了一个端口小问题的提醒
2019-04-18 11:44:06 +08:00
ct-git 0a49a9b1ee
加了一个端口小问题的提醒。
注意:Metis依赖80和8080端口,腾讯云服务器默认开通了80但没有开通8080的外网访问权限,需要手动在安全组中增加对8080端口的放通。
2019-04-18 11:14:29 +08:00
ct-git 8364184c46
Update install.md
注意:Metis依赖80和8080端口,腾讯云服务器默认开通了80但没有开通8080的外网访问权限,需要手动在安全组中增加对8080端口的放通。
2019-04-18 11:03:26 +08:00
ct-git 8cbdc6dc93
Update install.md
增加说明
Metis依赖80和8080端口,腾讯云服务器默认开通了80但没有开通8080的外网访问权限,需要手动在安全组中增加对8080端口的放通。
2019-04-18 11:00:29 +08:00
lxd1190 5490207e81 remove iforest 2019-03-22 15:24:27 +08:00
lxd1190 1a8eab9ec5 fix(tsd_common): fix normalize problem 2018-11-22 19:59:38 +08:00
lxd1190 e21a00bcd4 feat(so): add detect so 2018-11-21 17:17:01 +08:00
lxd1190 6e8344de95 feat(feature): add detect features 2018-11-21 17:12:41 +08:00
lxd1190 6945e32cc5 docs(api): add so api desc 2018-11-20 10:32:16 +08:00
lxd1190 024957e32d docs(api): add so api desc 2018-11-20 10:23:02 +08:00
lxd1190 3c46aa7282 style(detect): add code annotation 2018-11-09 15:55:49 +08:00
lxd1190 a9348864e0 docs(install): update install and api docs 2018-11-08 20:22:11 +08:00
lxd1190 eb9752200e Merge branch 'dev' of https://github.com/Tencent/Metis into dev 2018-11-08 19:35:08 +08:00
lxd1190 7c42a3ccae docs(api): add python api description 2018-11-08 19:34:12 +08:00
test 13802fd1f9 docs(changlog): add changlog 2018-11-08 16:50:18 +08:00
17 changed files with 727 additions and 55 deletions

View File

@ -120,7 +120,7 @@ class DetectService(object):
return True
def __check_param(self, data):
if ("viewName" not in data.keys()) or ("attrId" not in data.keys()) or ("attrName" not in data.keys()) or ("time" not in data.keys()) or ("dataC" not in data.keys()) or ("dataB" not in data.keys()) or ("dataA" not in data.keys()):
if ("viewName" not in data.keys()) or ("viewId" not in data.keys()) or ("attrId" not in data.keys()) or ("attrName" not in data.keys()) or ("time" not in data.keys()) or ("dataC" not in data.keys()) or ("dataB" not in data.keys()) or ("dataA" not in data.keys()):
return CHECK_PARAM_FAILED, "missing parameter"
return OP_SUCCESS, ""

View File

@ -4,16 +4,16 @@ SET FOREIGN_KEY_CHECKS=0;
-- ----------------------------
DROP TABLE IF EXISTS `anomaly`;
CREATE TABLE `anomaly` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`view_id` varchar(31) DEFAULT NULL,
`view_name` varchar(63) DEFAULT NULL,
`attr_id` varchar(31) DEFAULT NULL,
`attr_name` varchar(63) DEFAULT NULL,
`time` datetime DEFAULT NULL,
`data_c` text NOT NULL,
`data_b` text NOT NULL,
`data_a` text NOT NULL,
`mark_flag` int(1) NOT NULL DEFAULT '0',
`id` int(10) NOT NULL AUTO_INCREMENT,
`view_id` varchar(31) NOT NULL DEFAULT '' comment '指标集id',
`view_name` varchar(63) NOT NULL DEFAULT '' comment '指标集名',
`attr_id` varchar(31) NOT NULL DEFAULT '' comment '指标id',
`attr_name` varchar(63) NOT NULL DEFAULT '' comment '指标名',
`time` datetime DEFAULT NULL comment '数据时间',
`data_c` text,
`data_b` text,
`data_a` text,
`mark_flag` tinyint(1) NOT NULL DEFAULT 0 comment '0:没有打标、1:打标为正样本、2:打标为负样本',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

View File

@ -4,21 +4,21 @@ SET FOREIGN_KEY_CHECKS=0;
-- ----------------------------
DROP TABLE IF EXISTS `sample_dataset`;
CREATE TABLE `sample_dataset` (
`id` bigint(10) NOT NULL AUTO_INCREMENT,
`update_time` timestamp NULL DEFAULT NULL,
`view_id` varchar(31) DEFAULT NULL,
`view_name` varchar(63) DEFAULT NULL,
`attr_name` varchar(63) DEFAULT NULL,
`attr_id` varchar(31) DEFAULT NULL,
`source` varchar(31) DEFAULT NULL,
`train_or_test` varchar(31) DEFAULT NULL,
`positive_or_negative` varchar(31) DEFAULT NULL,
`window` int(2) DEFAULT NULL,
`data_time` int(11) DEFAULT NULL,
`id` int(10) NOT NULL AUTO_INCREMENT,
`update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '样本更新时间',
`view_id` varchar(31) NOT NULL DEFAULT '' comment '指标集id',
`view_name` varchar(63) NOT NULL DEFAULT '' comment '指标名',
`attr_name` varchar(63) NOT NULL DEFAULT '' comment '指标名',
`attr_id` varchar(31) NOT NULL DEFAULT '' comment '指标id',
`source` varchar(31) NOT NULL DEFAULT '' comment '样本来源',
`train_or_test` varchar(10) NOT NULL DEFAULT '' comment 'test测试样本、train:训练样本',
`positive_or_negative` varchar(20) NOT NULL DEFAULT '' comment 'positive:正样本、negative负样本',
`window` int(10) NOT NULL DEFAULT 0 comment '窗口值目前支持180',
`data_time` int(10) DEFAULT NULL comment '样本时间',
`data_c` text,
`data_b` text,
`data_a` text,
`anomaly_id` bigint(10) DEFAULT NULL,
`anomaly_id` int(10) DEFAULT NULL comment '标识从anomaly里插入的样本',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

View File

@ -4,19 +4,18 @@ SET FOREIGN_KEY_CHECKS=0;
-- ----------------------------
DROP TABLE IF EXISTS `train_task`;
CREATE TABLE `train_task` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`task_id` char(255) DEFAULT NULL,
`sample_num` int(11) DEFAULT NULL,
`postive_sample_num` int(11) DEFAULT NULL,
`negative_sample_num` int(11) DEFAULT NULL,
`window` int(2) DEFAULT NULL,
`model_name` varchar(20) DEFAULT NULL,
`source` varchar(255) DEFAULT NULL,
`start_time` timestamp NULL DEFAULT NULL,
`end_time` timestamp NULL DEFAULT NULL,
`status` varchar(11) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `id` (`id`)
`id` int(10) NOT NULL AUTO_INCREMENT,
`task_id` varchar(20) NOT NULL DEFAULT '' comment '训练任务id',
`sample_num` int(10) NOT NULL DEFAULT 0 comment '训练总样本数',
`postive_sample_num` int(10) NOT NULL DEFAULT 0 comment '训练正样本数',
`negative_sample_num` int(10) NOT NULL DEFAULT 0 comment '训练负样本数',
`window` int(10) NOT NULL DEFAULT 0 comment '窗口值目前支持180',
`model_name` varchar(20) NOT NULL DEFAULT '' comment '模型名',
`source` varchar(255) NOT NULL DEFAULT '' comment '样本来源',
`start_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '训练任务开始时间',
`end_time` timestamp NULL DEFAULT NULL comment '训练任务结束时间',
`status` varchar(11) NOT NULL DEFAULT '' comment 'complete:任务完成、running:任务正在运行、failed任务失败',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
-- ----------------------------

26
changeLog.md Normal file
View File

@ -0,0 +1,26 @@
# ChangeLog
## [2018-11-07, Version v0.2.0](https://github.com/Tencent/Metis/releases/tag/v0.2.0)
### Bug fixes
- [[```48618f59```](https://github.com/Tencent/Metis/commit/48618f59cb70249cba2d01d5413cbb4eea418721)] __-__ __docker__: start.sh dos2unix (lxd1190)
- [[```86076d84```](https://github.com/Tencent/Metis/commit/86076d843fab582c46728a7916a81aef7f1b78c3)] __-__ __docker__: update docker image (lxd1190)
### Code Refactoring
- [[```21b2b161```](https://github.com/Tencent/Metis/commit/21b2b1614d91eec1fc2fb07c6628f6a3868523e7)] __-__ __app__: refactor dictionary: add app module (lxd1190)
- [[```5faf04cf```](https://github.com/Tencent/Metis/commit/5faf04cf06643f7c9f3833daba7c81a31c028eef)] __-__ __app__: refactor dictionary (lxd1190)
### Other commits
- [[```e3167d25```](https://github.com/Tencent/Metis/commit/e3167d25c92cb9c852cdd5100de61c30f62ce9d5)] __-__ __docs(architecture docs)__ : update docs content (lxd1190)
- [[```86569e65```](https://github.com/Tencent/Metis/commit/86569e65bc4f5717fdd35c7511347f6e129f109d)] __-__ __docs(code_framework)__ : update arch picture and code description (lxd1190)
- [[```c739f92c```](https://github.com/Tencent/Metis/commit/c739f92ca6def3e37c75641c0bf22e41eb4e3c11)] __-__ __docs(install)__ : update db config path (lxd1190)

View File

@ -1,20 +1,22 @@
# API接口文档
## 时间序列异常检测学件接口
## 时间序列异常检测接口
用户可使用API接口对时间序列进行异常检测检测后的结果通过WEB管理端查看和管理。 服务端提供两个异常检测的API接口供不同场景调用
用户可根据场景选择使用API接口对时间序列进行异常检测
1、量值检测适用于大多数类型数据的检测,使用无监督和有监督联合检测,会加载检测模型
1、量值检测适用于大多数KPI指标数据的检测,使用无监督和有监督联合检测,会加载检测模型
2、率值检测适用于正态分布类型数据的检测使用无监督算法进行检测如成功率等生死指标数据的检测
- API请求调用请使用搭建的后端服务地址
- HTTP接口调用请使用搭建的后端服务地址Python接口可直接调用
- 当前检测时间窗口选取为3小时每分钟1个数据点即窗口值为180
- 同比数据日期和时间段的选择可根据实际情况调整,文档中两个同比数据分别取昨日和一周前的同比
针对当前一个值的检测,需要依赖过去三段数据,数据选取规则参考示例图:
![data_info](images/data_info.png)
### 1、量值检测
### 一、HTTP接口
#### 1、量值检测
* API POST /{ip}:{port}/PredictValue
* 功能说明:根据参考数据检测最近一个数据点是否异常
@ -73,7 +75,7 @@
| ret | int | 检测结果是否异常。0:异常1:正常 |
| p | string | 概率值值越小判定为异常的置信度越高目前p<0.15判决为异常 |
### 2、率值检测
#### 2、率值检测
* API POST /{ip}:{port}/PredictRate
* 功能说明:根据参考数据检测最近一个数据点是否异常
@ -129,4 +131,404 @@
| code | int | 返回码。0:成功非0:失败 |
| msg | string | 返回消息 |
| ret | int | 检测结果是否异常。0:异常1:正常 |
| p | string | 概率值,值越小,判定为异常的置信度越高 |
| p | string | 概率值,值越小,判定为异常的置信度越高 |
### 二、Python API
Metis工程目录下time_series_detector目录为时间序列异常检测学件可以在python代码中直接调用
#### 1、量值检测
* 功能说明:根据参考数据检测最近一个数据点是否异常
* 调用方法:
```
# Python
from time_series_detector import detect
detect_obj = detect.Detect()
detect_obj.value_predict(data)
```
* 传入参数python字典
```
{
"window":180,
"dataC":"9,10,152,...,255,...,16",
"dataB":"9,10,152,...,255,...,18",
"dataA":"9,10,152,...,458"
}
```
* 传入参数说明:
| 名称 | 类型 |必填| 默认值 | 说明 |
| --- | --- | --- |---- | --- |
| taskId| string| 否| 无|使用的检测模型,如不传,则采用系统默认模型|
| window| int| 否| 无|窗口值目前支持180|
| dataC| string| 是| 无|待检测的1个点对应一周前同时刻的点 + 前后各180个数据361个数据点按时间顺序拼接英文逗号分隔|
| dataB| string| 是| 无|待检测的1个点对应昨日同时刻的点 + 前后各180个数据361个数据点按时间顺序拼接英文逗号分隔|
| dataA| string| 是| 无|待检测的1个点+前180个数据共181个数据点181个数据点按时间顺序拼接英文逗号分隔|
* 返回参数:
```
code, {
"ret":0,
"p":"0.05",
}
```
* 返回参数说明:
| 名称 | 类型 | 说明 |
|---|---|---|
| code | int | 返回码。0:成功非0:失败 |
| ret | int | 检测结果是否异常。0:异常1:正常 |
| p | string | 概率值值越小判定为异常的置信度越高目前p<0.15判决为异常 |
* 调用案例:
![data_info](images/python_api_value_predict.png)
#### 2、率值检测
* 功能说明:根据参考数据检测最近一个数据点是否异常
* 调用方法:
```
# Python
from time_series_detector import detect
detect_obj = detect.Detect()
detect_obj.rate_predict(data)
```
* 传入参数python字典
```
{
"dataC":"9,10,152,...,255,...,16",
"dataB":"9,10,152,...,255,...,18",
"dataA":"9,10,152,...,458"
}
```
* 传入参数说明:
| 名称 | 类型 |必填| 默认值 | 说明 |
| --- | --- | --- |--- | --- |
| dataC| string| 是| 无|待检测的1个点对应一周前同时刻的点 + 前后各180个数据361个数据点按时间顺序拼接英文逗号分隔|
| dataB| string| 是| 无|待检测的1个点对应昨日同时刻的点 + 前后各180个数据361个数据点按时间顺序拼接英文逗号分隔|
| dataA| string| 是| 无|待检测的1个点+前180个数据共181个数据点181个数据点按时间顺序拼接英文逗号分隔|
* 返回参数:
```
code, {
"ret":0,
"p":"0",
}
```
* 返回参数说明:
| 名称 | 类型 | 说明 |
|---|---|---|
| code | int | 返回码。0:成功非0:失败 |
| ret | int | 检测结果是否异常。0:异常1:正常 |
| p | string | 概率值值越小判定为异常的置信度越高目前p<0.15判决为异常 |
* 调用案例:
![data_info](images/python_api_rate_predict.png)
### 三、LIB库
Metis工程目录下time_series_detector/lib为学件动态库目录库文件可以在代码中加载调用
libdetect.so目前支持在CentOs7.2+系统环境下使用
#### Python代码中调用:
##### 1、量值检测
* 功能说明:根据参考数据检测最近一个数据点是否异常
* 调用方法:
加载so库
```
# Python
so = cdll.LoadLibrary
metis_lib = so("./libdetect.so")
handle = metis_lib.load_model("./xgb_default_model")
```
构造传入数据:
```
# Python
from ctypes import *
class ValueData(Structure):
_fields_ = [('data_a', POINTER(c_int)), ('data_b', POINTER(c_int)), ('data_c', POINTER(c_int)),
('len_a', c_int), ('len_b', c_int), ('len_c', c_int)]
# test data
data_c = [1] * 361
data_b = [1] * 361
data_a = [1] * 180
data_a.append(10)
paarray = (c_int * len(data_a))(*data_a)
pbarray = (c_int * len(data_b))(*data_b)
pcarray = (c_int * len(data_c))(*data_c)
data_value = ValueData(paarray, pbarray, pcarray, len(data_a), len(data_b), len(data_c))
```
调用计算函数:
```
#python
result = c_int()
prob = c_float()
ret_code = metis_lib.value_predict(handle, byref(data_value), byref(result), byref(prob))
if ret_code != 0:
print "value_predict error code = %d" % ret_code
print result, prob
```
* 传入参数C结构体
```
typedef struct {
int* data_a;
int* data_b;
int* data_c;
int len_a;
int len_b;
int len_c;
} ValueData;
```
* 传入参数说明:
| 名称 | 类型 |必填| 默认值 | 说明 |
| --- | --- | --- |---- | --- |
| handle| int| 是| 无|模型句柄由load_model返回|
| data_value| ValueData| 是| 无|待检测数据|
* 返回参数:
```
ret_code
result
prob
```
* 返回参数说明:
| 名称 | 类型 | 说明 |
|---|---|---|
| ret_code | int | 返回码。0:成功非0:失败 |
| result | c_int | 检测结果是否异常。0:异常1:正常 |
| prob | c_float | 概率值值越小判定为异常的置信度越高目前prob<0.15判决为异常 |
##### 2、率值检测
* 功能说明:根据参考数据检测最近一个数据点是否异常
* 调用方法:
加载so库
```
# Python
so = cdll.LoadLibrary
metis_lib = so("./libdetect.so")
```
构造传入数据:
```
# Python
from ctypes import *
class RateData(Structure):
_fields_ = [('data_a', POINTER(c_double)), ('data_b', POINTER(c_double)), ('data_c', POINTER(c_double)),
('len_a', c_int), ('len_b', c_int), ('len_c', c_int)]
# test data
data_c = [1.0] * 361
data_b = [1.0] * 361
data_a = [1.0] * 180
data_a.append(0.9)
paarray = (c_double * len(data_a))(*data_a)
pbarray = (c_double * len(data_b))(*data_b)
pcarray = (c_double * len(data_c))(*data_c)
data_value = RateData(paarray, pbarray, pcarray, len(data_a), len(data_b), len(data_c))
```
调用计算函数:
```
#python
result = c_int()
prob = c_float()
ret_code = metis_lib.rate_predict(byref(data_value), byref(result), byref(prob))
if ret_code != 0:
print "value_predict error code = %d" % ret_code
print result, prob
```
* 传入参数C结构体
```
typedef struct {
double* data_a;
double* data_b;
double* data_c;
int len_a;
int len_b;
int len_c;
} RateData;
```
* 传入参数说明:
| 名称 | 类型 |必填| 默认值 | 说明 |
| --- | --- | --- |---- | --- |
| data_value| RateData| 是| 无|待检测数据|
* 返回参数:
```
ret_code
result
prob
```
* 返回参数说明:
| 名称 | 类型 | 说明 |
|---|---|---|
| ret_code | int | 返回码。0:成功非0:失败 |
| result | c_int | 检测结果是否异常。0:异常1:正常 |
| prob | c_float | 概率值,值越小,判定为异常的置信度越高 |
#### C代码中调用:
在C中调用检测函数需要include头文件detect.h在编译时链接libdetect.so文件
##### 1、量值检测
* 功能说明:根据参考数据检测最近一个数据点是否异常
* 调用方法:
调用load_model加载模型然后调用value_predict进行预测
```
#include "detect.h"
if (NULL == (handle = load_model("./xgb_default_model")))
{
printf("load model error\n");
return 0;
}
int ret = value_predict(handle, &value_data, &sample_result, &prob);
printf ("ret=%d result = %d prob = %f\n", ret, sample_result, prob);
```
* 传入参数C结构体
```
typedef struct {
int* data_a;
int* data_b;
int* data_c;
int len_a;
int len_b;
int len_c;
} ValueData;
```
* 传入参数说明:
| 名称 | 类型 |必填| 默认值 | 说明 |
| --- | --- | --- |---- | --- |
| handle| int| 是| 无|模型句柄由load_model返回|
| value_data| ValueData| 是| 无|待检测数据|
* 返回参数:
```
ret
sample_result
prob
```
* 返回参数说明:
| 名称 | 类型 | 说明 |
|---|---|---|
| ret | int | 返回码。0:成功非0:失败 |
| sample_result | c_int | 检测结果是否异常。0:异常1:正常 |
| prob | c_float | 概率值值越小判定为异常的置信度越高目前prob<0.15判决为异常 |
##### 2、率值检测
* 功能说明:根据参考数据检测最近一个数据点是否异常
* 调用方法:
```
#include "detect.h"
float prob;
int sample_result;
int ret = rate_predict(&rate_data, &sample_result, &prob);
printf ("ret=%d result =%d prob = %f \n", ret, sample_result, prob);
```
* 传入参数C结构体
```
typedef struct {
double* data_a;
double* data_b;
double* data_c;
int len_a;
int len_b;
int len_c;
} RateData;
```
* 传入参数说明:
| 名称 | 类型 |必填| 默认值 | 说明 |
| --- | --- | --- |---- | --- |
| rate_data| RateData| 是| 无|待检测数据|
* 返回参数:
```
ret
sample_result
prob
```
* 返回参数说明:
| 名称 | 类型 | 说明 |
|---|---|---|
| ret | int | 返回码。0:成功非0:失败 |
| result | c_int | 检测结果是否异常。0:异常1:正常 |
| prob | c_float | 概率值,值越小,判定为异常的置信度越高 |

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.9 KiB

View File

@ -111,7 +111,7 @@ export PYTHONPATH=/data/Metis:$PYTHONPATH
### 1.3.4. 启动服务端
启动服务端程序
启动服务端程序ip请替换为服务器真实ip地址
```
python /data/Metis/app/controller/manage.py runserver {ip}:{port}
@ -175,6 +175,8 @@ nginx正常启动后打开浏览器并访问 `http://${ip}:80/`
npm run build 项目代码开发完成后,执行该命令打包项目代码。在项目根目录会生成一个 dist 目录然后复制custom目录放至dist目录下。发布时将 dist 目录中的全部文件作为静态文件,放至服务器指定的静态文件目录即可
安装完成后请参考API使用说明进行API调用
# 2. <a id="chapter-5"></a>docker安装部署
## 2.1. 安装docker
@ -195,4 +197,7 @@ docker ps
```
查看三个容器metis-db、metis-web、metis-svr启动状态如正常启动则安装成功。
![docker_ps](images/docker_ps.png)
如安装成功,可以通过浏览器直接访问: `http://${IP}`
如安装成功,可以通过浏览器直接访问: `http://${IP}`
注意Metis依赖80和8080端口腾讯云服务器默认开通了80但没有开通8080的外网访问权限需要手动在安全组中增加对8080端口的放通。
请参考API使用说明进行API调用

View File

@ -78,3 +78,20 @@ def normalize_time_series(split_time_series):
normalized_data_a
]
return normalized_split_time_series
def normalize_time_series_by_max_min(split_time_series):
"""
Normalize the split_time_series by max_min_normalization.
:param split_time_series: [[data_c_left], [data_c_right], [data_b_left], [data_b_right], [data_a]]
:return: max_min_normalized time_series
"""
time_series = split_time_series[0] + split_time_series[1][1:] + split_time_series[2] + split_time_series[3][1:] + split_time_series[4]
max_value = np.max(time_series)
min_value = np.min(time_series)
normalized_time_series = [0.0]*len(time_series)
if max_value - min_value > 0:
normalized_time_series = list((np.array(time_series) - min_value) / float(max_value - min_value))
return normalized_time_series

View File

@ -33,7 +33,7 @@ class Detect(object):
return True
def __check_param(self, data):
if ("viewName" not in data.keys()) or ("attrId" not in data.keys()) or ("attrName" not in data.keys()) or ("time" not in data.keys()) or ("dataC" not in data.keys()) or ("dataB" not in data.keys()) or ("dataA" not in data.keys()):
if ("dataC" not in data.keys()) or ("dataB" not in data.keys()) or ("dataA" not in data.keys()):
return TSD_CHECK_PARAM_FAILED, "missing parameter"
if not data['dataA']:
return TSD_CHECK_PARAM_FAILED, "dataA can not be empty"
@ -61,9 +61,18 @@ class Detect(object):
def value_predict(self, data):
"""
Predict the data
Predict if the latest value is an outlier or not.
:param data: the time series to detect of
:param data: The attributes are:
'window', the length of window,
'taskId', the id of detect model,
'dataC', a piece of data to learn,
'dataB', a piece of data to learn,
'dataA', a piece of data to learn and the latest value to be detected.
:type data: Dictionary-like object
:return: The attributes are:
'p', the class probability,
'ret', the result of detect(1 denotes normal, 0 denotes abnormal).
"""
ret_code, ret_data = self.__check_param(data)
if ret_code != TSD_OP_SUCCESS:
@ -81,8 +90,7 @@ class Detect(object):
statistic_result = self.statistic_obj.predict(time_series)
ewma_result = self.ewma_obj.predict(time_series)
polynomial_result = self.polynomial_obj.predict(time_series, window)
iforest_result = self.iforest_obj.predict(time_series, window)
if statistic_result == 0 or ewma_result == 0 or polynomial_result == 0 or iforest_result == 0:
if statistic_result == 0 or ewma_result == 0 or polynomial_result == 0 :
xgb_result = self.supervised_obj.predict(time_series, window, model_name)
res_value = xgb_result[0]
prob = xgb_result[1]
@ -94,9 +102,16 @@ class Detect(object):
def rate_predict(self, data):
"""
Predict the data
Predict if the latest value is an outlier or not.
:param data: the time series to detect of
:param data: The attributes are:
'dataC', a piece of data to learn,
'dataB', a piece of data to learn,
'dataA', a piece of data to learn and the latest value to be detected.
:type data: Dictionary-like object
:return: The attributes are:
'p', the class probability,
'ret', the result of detect(1 denotes normal, 0 denotes abnormal).
"""
combined_data = data["dataC"] + "," + data["dataB"] + "," + data["dataA"]
time_series = map(float, combined_data.split(','))

View File

@ -10,6 +10,8 @@ Unless required by applicable law or agreed to in writing, software distributed
import numpy as np
import tsfresh.feature_extraction.feature_calculators as ts_feature_calculators
from time_series_detector.common.tsd_common import DEFAULT_WINDOW, split_time_series
from statistical_features import time_series_mean, time_series_variance, time_series_standard_deviation, time_series_median
def time_series_autocorrelation(x):
@ -35,6 +37,8 @@ def time_series_autocorrelation(x):
:return type: float
"""
lag = int((len(x) - 3) / 5)
if np.sqrt(np.var(x)) < 1e-10:
return 0
return ts_feature_calculators.autocorrelation(x, lag)
@ -47,6 +51,8 @@ def time_series_coefficient_of_variation(x):
:return: the value of this feature
:return type: float
"""
if np.sqrt(np.var(x)) < 1e-10:
return 0
return np.mean(x) / np.sqrt(np.var(x))
@ -74,15 +80,132 @@ def time_series_binned_entropy(x):
result.append(ts_feature_calculators.binned_entropy(x, value))
return result
def time_series_value_distribution(x):
"""
Given buckets, calculate the percentage of elements in the whole time series
in different buckets
:param x: normalized time series
:type x: pandas.Series
:return: the values of this feature
:return type: list
"""
thresholds = [0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0, 1.0]
return list(np.histogram(x, bins=thresholds)[0] / float(len(x)))
def time_series_daily_parts_value_distribution(x):
"""
Given buckets, calculate the percentage of elements in three subsequences
of the whole time series in different buckets
:param x: normalized time series
:type x: pandas.Series
:return: the values of this feature
:return type: list
"""
thresholds = [0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0, 1.0]
split_value_list = split_time_series(x, DEFAULT_WINDOW)
data_c = split_value_list[0] + split_value_list[1][1:]
data_b = split_value_list[2] + split_value_list[3][1:]
data_a = split_value_list[4]
count_c = list(np.histogram(data_c, bins=thresholds)[0])
count_b = list(np.histogram(data_b, bins=thresholds)[0])
count_a = list(np.histogram(data_a, bins=thresholds)[0])
return list(np.array(count_c) / float(len(data_c))) + list(np.array(count_b) / float(len(data_b))) + list(np.array(count_a) / float(len(data_a)))
def time_series_daily_parts_value_distribution_with_threshold(x):
"""
Split the whole time series into three parts: c, b, a.
Given a threshold = 0.01, return the percentage of elements of time series
which are less than threshold
:param x: normalized time series
:type x: pandas.Series
:return: 6 values of this feature
:return type: list
"""
threshold = 0.01
split_value_list = split_time_series(x, DEFAULT_WINDOW)
data_c = split_value_list[0] + split_value_list[1][1:]
data_b = split_value_list[2] + split_value_list[3][1:]
data_a = split_value_list[4]
# the number of elements in time series which is less than threshold:
nparray_data_c_threshold = np.array(data_c)
nparray_data_c_threshold[nparray_data_c_threshold < threshold] = -1
nparray_data_b_threshold = np.array(data_b)
nparray_data_b_threshold[nparray_data_b_threshold < threshold] = -1
nparray_data_a_threshold = np.array(data_a)
nparray_data_a_threshold[nparray_data_a_threshold < threshold] = -1
# the total number of elements in time series which is less than threshold:
nparray_threshold_count = (nparray_data_c_threshold == -1).sum() + (nparray_data_b_threshold == -1).sum() + (nparray_data_a_threshold == -1).sum()
if nparray_threshold_count == 0:
features = [0, 0, 0]
else:
features = [
(nparray_data_c_threshold == -1).sum() / float(nparray_threshold_count),
(nparray_data_b_threshold == -1).sum() / float(nparray_threshold_count),
(nparray_data_a_threshold == -1).sum() / float(nparray_threshold_count)
]
features.extend([
(nparray_data_c_threshold == -1).sum() / float(len(data_c)),
(nparray_data_b_threshold == -1).sum() / float(len(data_b)),
(nparray_data_a_threshold == -1).sum() / float(len(data_a))
])
return features
def time_series_window_parts_value_distribution_with_threshold(x):
"""
Split the whole time series into five parts.
Given a threshold = 0.01, return the percentage of elements of time series
which are less than threshold
:param x: normalized time series
:type x: pandas.Series
:return: 5 values of this feature
:return type: list
"""
threshold = 0.01
split_value_list = split_time_series(x, DEFAULT_WINDOW)
count_list = []
for value_list in split_value_list:
nparray_threshold = np.array(value_list)
nparray_threshold[nparray_threshold < threshold] = -1
count_list.append((nparray_threshold == -1).sum())
if sum(count_list) == 0:
features = [0, 0, 0, 0, 0]
else:
features = list(np.array(count_list) / float((DEFAULT_WINDOW + 1)))
return features
# add yourself classification features here...
def get_classification_features(x):
classification_features = [
time_series_mean(x),
time_series_variance(x),
time_series_standard_deviation(x),
time_series_median(x),
time_series_autocorrelation(x),
time_series_coefficient_of_variation(x)
]
classification_features.extend(time_series_value_distribution(x))
classification_features.extend(time_series_daily_parts_value_distribution(x))
classification_features.extend(time_series_daily_parts_value_distribution_with_threshold(x))
classification_features.extend(time_series_window_parts_value_distribution_with_threshold(x))
classification_features.extend(time_series_binned_entropy(x))
# append yourself classification features here...
# add yourself classification features here...
return classification_features

View File

@ -34,9 +34,10 @@ def extract_features(time_series, window):
split_time_series = tsd_common.split_time_series(time_series, window)
# nomalize time_series
normalized_split_time_series = tsd_common.normalize_time_series(split_time_series)
max_min_normalized_time_series = tsd_common.normalize_time_series_by_max_min(split_time_series)
s_features = statistical_features.get_statistical_features(normalized_split_time_series[4])
f_features = fitting_features.get_fitting_features(normalized_split_time_series)
c_features = classification_features.get_classification_features(normalized_split_time_series[0] + normalized_split_time_series[1][1:] + normalized_split_time_series[2] + normalized_split_time_series[3][1:] + normalized_split_time_series[4])
c_features = classification_features.get_classification_features(max_min_normalized_time_series)
# combine features with types
features = s_features + f_features + c_features
return features

View File

@ -49,7 +49,7 @@ def time_series_weighted_moving_average(x):
for w in range(1, min(50, DEFAULT_WINDOW), 5):
w = min(len(x), w) # avoid the case len(value_list) < w
coefficient = np.array(range(1, w + 1))
temp_list.append((np.dot(coefficient, x[-w:])) / (w * (w + 1) / 2))
temp_list.append((np.dot(coefficient, x[-w:])) / float(w * (w + 1) / 2))
return list(np.array(temp_list) - x[-1])
@ -210,6 +210,11 @@ def time_series_periodic_features(data_c_left, data_c_right, data_b_left, data_b
periodic_features.append(-1)
else:
periodic_features.append(1)
step = DEFAULT_WINDOW / 6
for w in range(1, DEFAULT_WINDOW, step):
periodic_features.append(min(max(data_a[w - 1:w + step]) - data_a[-1], 0))
periodic_features.append(max(min(data_a[w - 1:w + step]) - data_a[-1], 0))
return periodic_features
# add yourself fitting features here...

View File

@ -0,0 +1,79 @@
/*
Tencent is pleased to support the open source community by making Metis available.
Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
https://opensource.org/licenses/BSD-3-Clause
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _DETECT_H
#define _DETECT_H
#include <inttypes.h>
#ifdef __cplusplus
extern "C"{
#endif
typedef struct {
int* data_a;
int* data_b;
int* data_c;
int len_a;
int len_b;
int len_c;
} ValueData;
typedef struct {
double* data_a;
double* data_b;
double* data_c;
int len_a;
int len_b;
int len_c;
} RateData;
enum TSD_ERR_CODE
{
TSD_SUCCESS = 0,
TSD_INVALID_HANDLER = -1,
TSD_CHECK_PARAM_FAILED = -2,
TSD_TIMESERIES_INIT_ERROR = -3
};
enum TSD_SAMPLE_RESULT
{
TSD_NEGATIVE = 0,
TSD_POSITIVE = 1
};
/*!
* \load xgb model from xgb file
* \param fname xgb file path and name
* \return handle when success, NULL when failure happens
*/
void * load_model(const char *fname);
/*!
* \Predict if the latest value is an outlier or not.
* \param mhandle the handle of the xgb model
* \param data the input data
* \param sample_result:(1 denotes noraml, 0 denotes abnormal).
* \return 0 when success, <0 when failure happens
*/
int value_predict(void * mhandle, ValueData* data, int* sample_result, float* prob);
/*!
* \Predict if the latest value is an outlier or not.
* \param mhandle the handle of the xgb model
* \param data the input data
* \param sample_result:(1 denotes noraml, 0 denotes abnormal).
* \return 0 when success, <0 when failure happens
*/
int rate_predict(RateData* data, int* sample_result, float* prob);
#ifdef __cplusplus
}
#endif
#endif

Binary file not shown.