Merge pull request #60 from ct-git/master

加了一个端口小问题的提醒
加了一个端口小问题的提醒。
2025-12-26 04:02:48 +00:00 · 2019-04-18 11:44:06 +08:00 · 2019-04-18 11:14:29 +08:00 · 2019-04-18 11:03:26 +08:00 · 2019-04-18 11:00:29 +08:00 · 2019-03-22 15:24:27 +08:00
17 changed files with 727 additions and 55 deletions
--- a/app/service/time_series_detector/detect_service.py
+++ b/app/service/time_series_detector/detect_service.py
@ -120,7 +120,7 @@ class DetectService(object):
        return True

    def __check_param(self, data):
-        if ("viewName" not in data.keys()) or ("attrId" not in data.keys()) or ("attrName" not in data.keys()) or ("time" not in data.keys()) or ("dataC" not in data.keys()) or ("dataB" not in data.keys()) or ("dataA" not in data.keys()):
+        if ("viewName" not in data.keys()) or ("viewId" not in data.keys()) or ("attrId" not in data.keys()) or ("attrName" not in data.keys()) or ("time" not in data.keys()) or ("dataC" not in data.keys()) or ("dataB" not in data.keys()) or ("dataA" not in data.keys()):
            return CHECK_PARAM_FAILED, "missing parameter"
        return OP_SUCCESS, ""

--- a/app/sql/time_series_detector/anomaly.sql
+++ b/app/sql/time_series_detector/anomaly.sql
@ -4,16 +4,16 @@ SET FOREIGN_KEY_CHECKS=0;
 -- ----------------------------
 DROP TABLE IF EXISTS `anomaly`;
 CREATE TABLE `anomaly` (
-  `id` bigint(20) NOT NULL AUTO_INCREMENT,
-  `view_id` varchar(31) DEFAULT NULL,
-  `view_name` varchar(63) DEFAULT NULL,
-  `attr_id` varchar(31) DEFAULT NULL,
-  `attr_name` varchar(63) DEFAULT NULL,
-  `time` datetime DEFAULT NULL,
-  `data_c` text NOT NULL,
-  `data_b` text NOT NULL,
-  `data_a` text NOT NULL,
-  `mark_flag` int(1) NOT NULL DEFAULT '0',
+  `id` int(10) NOT NULL AUTO_INCREMENT,
+  `view_id` varchar(31) NOT NULL DEFAULT '' comment '指标集id',
+  `view_name` varchar(63) NOT NULL DEFAULT '' comment '指标集名',
+  `attr_id` varchar(31) NOT NULL DEFAULT '' comment '指标id',
+  `attr_name` varchar(63) NOT NULL DEFAULT '' comment '指标名',
+  `time` datetime DEFAULT NULL comment '数据时间',
+  `data_c` text,
+  `data_b` text,
+  `data_a` text,
+  `mark_flag` tinyint(1) NOT NULL DEFAULT 0 comment '0:没有打标、1:打标为正样本、2:打标为负样本',
  PRIMARY KEY (`id`)
 ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

--- a/app/sql/time_series_detector/sample_dataset.sql
+++ b/app/sql/time_series_detector/sample_dataset.sql
@ -4,21 +4,21 @@ SET FOREIGN_KEY_CHECKS=0;
 -- ----------------------------
 DROP TABLE IF EXISTS `sample_dataset`;
 CREATE TABLE `sample_dataset` (
-  `id` bigint(10) NOT NULL AUTO_INCREMENT,
-  `update_time` timestamp NULL DEFAULT NULL,
-  `view_id` varchar(31) DEFAULT NULL,
-  `view_name` varchar(63) DEFAULT NULL,
-  `attr_name` varchar(63) DEFAULT NULL,
-  `attr_id` varchar(31) DEFAULT NULL,
-  `source` varchar(31) DEFAULT NULL,
-  `train_or_test` varchar(31) DEFAULT NULL,
-  `positive_or_negative` varchar(31) DEFAULT NULL,
-  `window` int(2) DEFAULT NULL,
-  `data_time` int(11) DEFAULT NULL,
+  `id` int(10) NOT NULL AUTO_INCREMENT,
+  `update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '样本更新时间',
+  `view_id` varchar(31) NOT NULL DEFAULT '' comment '指标集id',
+  `view_name` varchar(63) NOT NULL DEFAULT '' comment '指标名',
+  `attr_name` varchar(63) NOT NULL DEFAULT '' comment '指标名',
+  `attr_id` varchar(31) NOT NULL DEFAULT '' comment '指标id',
+  `source` varchar(31) NOT NULL DEFAULT '' comment '样本来源',
+  `train_or_test` varchar(10) NOT NULL DEFAULT '' comment 'test：测试样本、train:训练样本',
+  `positive_or_negative` varchar(20) NOT NULL DEFAULT '' comment 'positive:正样本、negative负样本',
+  `window` int(10) NOT NULL DEFAULT 0 comment '窗口值，目前支持180',
+  `data_time` int(10) DEFAULT NULL comment '样本时间',
  `data_c` text,
  `data_b` text,
  `data_a` text,
-  `anomaly_id` bigint(10) DEFAULT NULL,
+  `anomaly_id` int(10) DEFAULT NULL comment '标识从anomaly里插入的样本',
  PRIMARY KEY (`id`)
 ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

--- a/app/sql/time_series_detector/train_task.sql
+++ b/app/sql/time_series_detector/train_task.sql
@ -4,19 +4,18 @@ SET FOREIGN_KEY_CHECKS=0;
 -- ----------------------------
 DROP TABLE IF EXISTS `train_task`;
 CREATE TABLE `train_task` (
-  `id` int(11) NOT NULL AUTO_INCREMENT,
-  `task_id` char(255) DEFAULT NULL,
-  `sample_num` int(11) DEFAULT NULL,
-  `postive_sample_num` int(11) DEFAULT NULL,
-  `negative_sample_num` int(11) DEFAULT NULL,
-  `window` int(2) DEFAULT NULL,
-  `model_name` varchar(20) DEFAULT NULL,
-  `source` varchar(255) DEFAULT NULL,
-  `start_time` timestamp NULL DEFAULT NULL,
-  `end_time` timestamp NULL DEFAULT NULL,
-  `status` varchar(11) DEFAULT NULL,
-  PRIMARY KEY (`id`),
-  KEY `id` (`id`)
+  `id` int(10) NOT NULL AUTO_INCREMENT,
+  `task_id` varchar(20) NOT NULL DEFAULT '' comment '训练任务id',
+  `sample_num` int(10) NOT NULL DEFAULT 0 comment '训练总样本数',
+  `postive_sample_num` int(10) NOT NULL DEFAULT 0 comment '训练正样本数',
+  `negative_sample_num` int(10) NOT NULL DEFAULT 0 comment '训练负样本数',
+  `window` int(10) NOT NULL DEFAULT 0 comment '窗口值，目前支持180',
+  `model_name` varchar(20) NOT NULL DEFAULT '' comment '模型名',
+  `source` varchar(255) NOT NULL DEFAULT '' comment '样本来源',
+  `start_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '训练任务开始时间',
+  `end_time` timestamp NULL DEFAULT NULL comment '训练任务结束时间',
+  `status` varchar(11) NOT NULL DEFAULT '' comment 'complete:任务完成、running:任务正在运行、failed：任务失败',
+  PRIMARY KEY (`id`)
 ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

 -- ----------------------------
--- a/changeLog.md
+++ b/changeLog.md
@ -0,0 +1,26 @@
+# ChangeLog
+
+## [2018-11-07, Version  v0.2.0](https://github.com/Tencent/Metis/releases/tag/v0.2.0)
+
+
+### Bug fixes
+	
+ - [[```48618f59```](https://github.com/Tencent/Metis/commit/48618f59cb70249cba2d01d5413cbb4eea418721)] __-__ __docker__: start.sh dos2unix (lxd1190)
+ - [[```86076d84```](https://github.com/Tencent/Metis/commit/86076d843fab582c46728a7916a81aef7f1b78c3)] __-__ __docker__: update docker image (lxd1190)
+
+
+### Code Refactoring
+
+ - [[```21b2b161```](https://github.com/Tencent/Metis/commit/21b2b1614d91eec1fc2fb07c6628f6a3868523e7)] __-__ __app__: refactor dictionary: add app module (lxd1190)
+ - [[```5faf04cf```](https://github.com/Tencent/Metis/commit/5faf04cf06643f7c9f3833daba7c81a31c028eef)] __-__ __app__: refactor dictionary (lxd1190)
+
+
+### Other commits
+
+ - [[```e3167d25```](https://github.com/Tencent/Metis/commit/e3167d25c92cb9c852cdd5100de61c30f62ce9d5)] __-__ __docs(architecture docs)__ : update docs content (lxd1190)
+ - [[```86569e65```](https://github.com/Tencent/Metis/commit/86569e65bc4f5717fdd35c7511347f6e129f109d)] __-__ __docs(code_framework)__ : update arch picture and code description (lxd1190)
+ - [[```c739f92c```](https://github.com/Tencent/Metis/commit/c739f92ca6def3e37c75641c0bf22e41eb4e3c11)] __-__ __docs(install)__ : update db config path (lxd1190)
+
+
+
+
--- a/docs/api_userguide.md
+++ b/docs/api_userguide.md
@ -1,20 +1,22 @@
 # API接口文档
-## 时间序列异常检测学件接口
+## 时间序列异常检测接口

-用户可使用API接口对时间序列进行异常检测，检测后的结果通过WEB管理端查看和管理。 服务端提供两个异常检测的API接口供不同场景调用：
+用户可根据场景选择使用API接口对时间序列进行异常检测：

-1、量值检测：适用于大多数类型数据的检测，使用无监督和有监督联合检测，会加载检测模型
+1、量值检测：适用于大多数KPI指标数据的检测，使用无监督和有监督联合检测，会加载检测模型

 2、率值检测：适用于正态分布类型数据的检测，使用无监督算法进行检测，如成功率等生死指标数据的检测

- API请求调用请使用搭建的后端服务地址
+- HTTP接口调用请使用搭建的后端服务地址；Python接口可直接调用
 - 当前检测时间窗口选取为3小时，每分钟1个数据点，即窗口值为180
 - 同比数据日期和时间段的选择可根据实际情况调整，文档中两个同比数据分别取昨日和一周前的同比

 针对当前一个值的检测，需要依赖过去三段数据，数据选取规则参考示例图：
 ![data_info](images/data_info.png)

-### 1、量值检测
+### 一、HTTP接口
+
+#### 1、量值检测

 * API： POST /{ip}:{port}/PredictValue
 * 功能说明：根据参考数据检测最近一个数据点是否异常
@ -73,7 +75,7 @@
 | ret | int | 检测结果是否异常。0:异常；1:正常 |
 | p | string | 概率值，值越小，判定为异常的置信度越高，目前p<0.15，判决为异常 |

-### 2、率值检测
+#### 2、率值检测

 * API： POST /{ip}:{port}/PredictRate
 * 功能说明：根据参考数据检测最近一个数据点是否异常
@ -129,4 +131,404 @@
 | code | int | 返回码。0:成功；非0:失败 |
 | msg | string | 返回消息 |
 | ret | int | 检测结果是否异常。0:异常；1:正常 |
-| p | string | 概率值，值越小，判定为异常的置信度越高 |
+| p | string | 概率值，值越小，判定为异常的置信度越高 |
+
+### 二、Python API
+
+Metis工程目录下time_series_detector目录为时间序列异常检测学件，可以在python代码中直接调用
+
+#### 1、量值检测
+* 功能说明：根据参考数据检测最近一个数据点是否异常
+
+* 调用方法： 
+
+```
+# Python
+from time_series_detector import detect
+
+detect_obj = detect.Detect()
+detect_obj.value_predict(data)
+```
+
+
+* 传入参数：python字典
+	
+```
+{
+    "window":180,
+    "dataC":"9,10,152,...,255,...,16",
+    "dataB":"9,10,152,...,255,...,18",
+    "dataA":"9,10,152,...,458"
+}
+```
+
+* 传入参数说明：
+
+| 名称  | 类型 |必填| 默认值 | 说明 |
+| --- | --- | --- |---- | --- |
+| taskId|  string| 否| 无|使用的检测模型，如不传，则采用系统默认模型|
+| window|  int| 否| 无|窗口值，目前支持180|
+| dataC|  string| 是| 无|待检测的1个点对应一周前同时刻的点 + 前后各180个数据，361个数据点按时间顺序拼接，英文逗号分隔|
+| dataB|  string| 是| 无|待检测的1个点对应昨日同时刻的点 + 前后各180个数据，361个数据点按时间顺序拼接，英文逗号分隔|
+| dataA|  string| 是| 无|待检测的1个点+前180个数据，共181个数据点，181个数据点按时间顺序拼接，英文逗号分隔|
+
+
+
+* 返回参数：
+```
+    code, {
+        "ret":0,
+        "p":"0.05",
+    }
+
+```
+
+* 返回参数说明：
+
+| 名称  | 类型  | 说明 |
+|---|---|---|
+| code | int | 返回码。0:成功；非0:失败 |
+| ret | int | 检测结果是否异常。0:异常；1:正常 |
+| p | string | 概率值，值越小，判定为异常的置信度越高，目前p<0.15，判决为异常 |
+
+* 调用案例：
+
+![data_info](images/python_api_value_predict.png)
+
+#### 2、率值检测
+* 功能说明：根据参考数据检测最近一个数据点是否异常
+
+* 调用方法： 
+
+```
+# Python
+from time_series_detector import detect
+
+detect_obj = detect.Detect()
+detect_obj.rate_predict(data)
+```
+
+* 传入参数：python字典
+	
+```
+{
+    "dataC":"9,10,152,...,255,...,16",
+    "dataB":"9,10,152,...,255,...,18",
+    "dataA":"9,10,152,...,458"
+}
+```
+
+* 传入参数说明：
+
+| 名称  | 类型 |必填| 默认值 | 说明 |
+| ---  | ---  | --- |---  | --- |
+| dataC|  string| 是| 无|待检测的1个点对应一周前同时刻的点 + 前后各180个数据，361个数据点按时间顺序拼接，英文逗号分隔|
+| dataB|  string| 是| 无|待检测的1个点对应昨日同时刻的点 + 前后各180个数据，361个数据点按时间顺序拼接，英文逗号分隔|
+| dataA|  string| 是| 无|待检测的1个点+前180个数据，共181个数据点，181个数据点按时间顺序拼接，英文逗号分隔|
+
+
+
+* 返回参数：
+```
+    code, {
+        "ret":0,
+        "p":"0",
+    }
+
+```
+
+* 返回参数说明：
+
+| 名称  | 类型  | 说明 |
+|---|---|---|
+| code | int | 返回码。0:成功；非0:失败 |
+| ret | int | 检测结果是否异常。0:异常；1:正常 |
+| p | string | 概率值，值越小，判定为异常的置信度越高，目前p<0.15，判决为异常 |
+
+* 调用案例：
+
+![data_info](images/python_api_rate_predict.png)
+
+### 三、LIB库
+Metis工程目录下time_series_detector/lib为学件动态库目录，库文件可以在代码中加载调用
+
+libdetect.so目前支持在CentOs7.2+系统环境下使用
+
+
+
+#### Python代码中调用:
+
+##### 1、量值检测
+* 功能说明：根据参考数据检测最近一个数据点是否异常
+
+* 调用方法： 
+
+ 加载so库：
+ 
+```
+# Python
+so = cdll.LoadLibrary
+metis_lib = so("./libdetect.so")
+handle = metis_lib.load_model("./xgb_default_model")
+```
+
+  构造传入数据：
+   
+```
+# Python
+from ctypes import *
+
+class ValueData(Structure):
+    _fields_ = [('data_a', POINTER(c_int)), ('data_b', POINTER(c_int)), ('data_c', POINTER(c_int)),
+                ('len_a', c_int), ('len_b', c_int), ('len_c', c_int)]
+
+# test data
+data_c = [1] * 361
+data_b = [1] * 361
+data_a = [1] * 180
+data_a.append(10)
+
+paarray = (c_int * len(data_a))(*data_a)
+pbarray = (c_int * len(data_b))(*data_b)
+pcarray = (c_int * len(data_c))(*data_c)
+data_value = ValueData(paarray, pbarray, pcarray, len(data_a), len(data_b), len(data_c))  
+```
+
+调用计算函数：
+
+```
+#python
+result = c_int()
+prob = c_float()
+ret_code = metis_lib.value_predict(handle, byref(data_value), byref(result), byref(prob))
+if ret_code != 0:
+    print "value_predict error code = %d" % ret_code
+print result, prob
+```
+
+
+* 传入参数：C结构体
+	
+```
+typedef struct {
+    int* data_a;
+    int* data_b;
+    int* data_c;
+    int len_a;
+    int len_b;
+    int len_c;
+} ValueData;
+```
+* 传入参数说明：
+
+| 名称  | 类型 |必填| 默认值 | 说明 |
+| --- | --- | --- |---- | --- |
+| handle|  int| 是| 无|模型句柄，由load_model返回|
+| data_value|  ValueData| 是| 无|待检测数据|
+
+
+
+* 返回参数：
+```
+ret_code
+result
+prob
+```
+
+* 返回参数说明：
+
+| 名称  | 类型  | 说明 |
+|---|---|---|
+| ret_code | int | 返回码。0:成功；非0:失败 |
+| result | c_int | 检测结果是否异常。0:异常；1:正常 |
+| prob | c_float | 概率值，值越小，判定为异常的置信度越高，目前prob<0.15，判决为异常 |
+
+##### 2、率值检测
+* 功能说明：根据参考数据检测最近一个数据点是否异常
+
+* 调用方法： 
+
+ 加载so库：
+ 
+```
+# Python
+so = cdll.LoadLibrary
+metis_lib = so("./libdetect.so")
+```
+
+  构造传入数据：
+   
+```
+# Python
+from ctypes import *
+
+class RateData(Structure):
+_fields_ = [('data_a', POINTER(c_double)), ('data_b', POINTER(c_double)), ('data_c', POINTER(c_double)),
+            ('len_a', c_int), ('len_b', c_int), ('len_c', c_int)]
+
+# test data
+data_c = [1.0] * 361
+data_b = [1.0] * 361
+data_a = [1.0] * 180
+data_a.append(0.9)
+
+paarray = (c_double * len(data_a))(*data_a)
+pbarray = (c_double * len(data_b))(*data_b)
+pcarray = (c_double * len(data_c))(*data_c)
+data_value = RateData(paarray, pbarray, pcarray, len(data_a), len(data_b), len(data_c))
+```
+
+调用计算函数：
+
+```
+#python
+result = c_int()
+prob = c_float()
+ret_code = metis_lib.rate_predict(byref(data_value), byref(result), byref(prob))
+if ret_code != 0:
+    print "value_predict error code = %d" % ret_code
+print result, prob
+```
+
+
+* 传入参数：C结构体
+	
+```
+typedef struct {
+    double* data_a;
+    double* data_b;
+    double* data_c;
+    int len_a;
+    int len_b;
+    int len_c;
+} RateData;
+```
+* 传入参数说明：
+
+| 名称  | 类型 |必填| 默认值 | 说明 |
+| --- | --- | --- |---- | --- |
+| data_value|  RateData| 是| 无|待检测数据|
+
+
+
+* 返回参数：
+```
+ ret_code
+ result
+ prob
+```
+
+* 返回参数说明：
+
+| 名称  | 类型  | 说明 |
+|---|---|---|
+| ret_code | int | 返回码。0:成功；非0:失败 |
+| result | c_int | 检测结果是否异常。0:异常；1:正常 |
+| prob | c_float | 概率值，值越小，判定为异常的置信度越高 |
+
+#### C代码中调用:
+
+在C中调用检测函数，需要include头文件detect.h，在编译时链接libdetect.so文件
+##### 1、量值检测
+* 功能说明：根据参考数据检测最近一个数据点是否异常
+
+* 调用方法： 
+
+调用load_model加载模型，然后调用value_predict进行预测：
+
+
+ ```
+ #include "detect.h"
+ 
+ if (NULL == (handle = load_model("./xgb_default_model")))
+ {
+     printf("load model error\n");
+     return 0;
+ }
+ int ret = value_predict(handle, &value_data, &sample_result, &prob); 
+ printf ("ret=%d result = %d prob = %f\n", ret, sample_result, prob); 
+ ```
+ 
+ * 传入参数：C结构体
+	
+```
+typedef struct {
+    int* data_a;
+    int* data_b;
+    int* data_c;
+    int len_a;
+    int len_b;
+    int len_c;
+} ValueData;
+```
+* 传入参数说明：
+
+| 名称  | 类型 |必填| 默认值 | 说明 |
+| --- | --- | --- |---- | --- |
+| handle|  int| 是| 无|模型句柄，由load_model返回|
+| value_data|  ValueData| 是| 无|待检测数据|
+
+
+
+* 返回参数：
+```
+ret
+sample_result
+prob
+```
+
+* 返回参数说明：
+
+| 名称  | 类型  | 说明 |
+|---|---|---|
+| ret | int | 返回码。0:成功；非0:失败 |
+| sample_result | c_int | 检测结果是否异常。0:异常；1:正常 |
+| prob | c_float | 概率值，值越小，判定为异常的置信度越高，目前prob<0.15，判决为异常 |
+
+##### 2、率值检测
+* 功能说明：根据参考数据检测最近一个数据点是否异常
+
+* 调用方法： 
+
+```
+#include "detect.h"
+float prob;
+int sample_result;
+int ret = rate_predict(&rate_data, &sample_result, &prob);
+printf ("ret=%d result =%d prob = %f \n", ret, sample_result, prob);
+```
+
+
+* 传入参数：C结构体
+	
+```
+typedef struct {
+    double* data_a;
+    double* data_b;
+    double* data_c;
+    int len_a;
+    int len_b;
+    int len_c;
+} RateData;
+```
+* 传入参数说明：
+
+| 名称  | 类型 |必填| 默认值 | 说明 |
+| --- | --- | --- |---- | --- |
+| rate_data|  RateData| 是| 无|待检测数据|
+
+
+* 返回参数：
+```
+ret
+sample_result
+prob
+```
+
+* 返回参数说明：
+
+| 名称  | 类型  | 说明 |
+|---|---|---|
+| ret | int | 返回码。0:成功；非0:失败 |
+| result | c_int | 检测结果是否异常。0:异常；1:正常 |
+| prob | c_float | 概率值，值越小，判定为异常的置信度越高 |
--- a/docs/images/python_api_rate_predict.png
+++ b/docs/images/python_api_rate_predict.png
--- a/docs/images/python_api_value_predict.png
+++ b/docs/images/python_api_value_predict.png
--- a/docs/install.md
+++ b/docs/install.md
@ -111,7 +111,7 @@ export PYTHONPATH=/data/Metis:$PYTHONPATH

 ### 1.3.4. 启动服务端

-启动服务端程序
+启动服务端程序，ip请替换为服务器真实ip地址

 ```
 python /data/Metis/app/controller/manage.py runserver {ip}:{port}
@ -175,6 +175,8 @@ nginx正常启动后，打开浏览器并访问 `http://${ip}:80/`

 npm run build 项目代码开发完成后，执行该命令打包项目代码。在项目根目录会生成一个 dist 目录，然后复制custom目录，放至dist目录下。发布时，将 dist 目录中的全部文件作为静态文件，放至服务器指定的静态文件目录即可

+安装完成后，请参考API使用说明进行API调用
+
 # 2. <a id="chapter-5"></a>docker安装部署

 ## 2.1. 安装docker
@ -195,4 +197,7 @@ docker ps
 ``` 
 查看三个容器（metis-db、metis-web、metis-svr）启动状态，如正常启动，则安装成功。
 ![docker_ps](images/docker_ps.png)
-如安装成功，可以通过浏览器直接访问: `http://${IP}`
+如安装成功，可以通过浏览器直接访问: `http://${IP}`
+注意：Metis依赖80和8080端口，腾讯云服务器默认开通了80但没有开通8080的外网访问权限，需要手动在安全组中增加对8080端口的放通。
+
+请参考API使用说明进行API调用 
--- a/time_series_detector/common/tsd_common.py
+++ b/time_series_detector/common/tsd_common.py
@ -78,3 +78,20 @@ def normalize_time_series(split_time_series):
        normalized_data_a
    ]
    return normalized_split_time_series
+
+
+def normalize_time_series_by_max_min(split_time_series):
+    """
+    Normalize the split_time_series by max_min_normalization.
+
+    :param split_time_series: [[data_c_left], [data_c_right], [data_b_left], [data_b_right], [data_a]]
+    :return: max_min_normalized time_series
+    """
+    time_series = split_time_series[0] + split_time_series[1][1:] + split_time_series[2] + split_time_series[3][1:] + split_time_series[4]
+    max_value = np.max(time_series)
+    min_value = np.min(time_series)
+    normalized_time_series = [0.0]*len(time_series)
+    if max_value - min_value > 0:
+        normalized_time_series = list((np.array(time_series) - min_value) / float(max_value - min_value))
+
+    return normalized_time_series
--- a/time_series_detector/detect.py
+++ b/time_series_detector/detect.py
@ -33,7 +33,7 @@ class Detect(object):
        return True

    def __check_param(self, data):
-        if ("viewName" not in data.keys()) or ("attrId" not in data.keys()) or ("attrName" not in data.keys()) or ("time" not in data.keys()) or ("dataC" not in data.keys()) or ("dataB" not in data.keys()) or ("dataA" not in data.keys()):
+        if ("dataC" not in data.keys()) or ("dataB" not in data.keys()) or ("dataA" not in data.keys()):
            return TSD_CHECK_PARAM_FAILED, "missing parameter"
        if not data['dataA']:
            return TSD_CHECK_PARAM_FAILED, "dataA can not be empty"
@ -61,9 +61,18 @@ class Detect(object):

    def value_predict(self, data):
        """
-        Predict the data
+        Predict if the latest value is an outlier or not.

-        :param data: the time series to detect of
+        :param data: The attributes are:
+                    'window', the length of window,
+                    'taskId', the id of detect model,
+                    'dataC', a piece of data to learn,
+                    'dataB', a piece of data to learn,
+                    'dataA', a piece of data to learn and the latest value to be detected.
+        :type data: Dictionary-like object
+        :return: The attributes are:
+                    'p', the class probability,
+                    'ret', the result of detect(1 denotes normal, 0 denotes abnormal).
        """
        ret_code, ret_data = self.__check_param(data)
        if ret_code != TSD_OP_SUCCESS:
@ -81,8 +90,7 @@ class Detect(object):
        statistic_result = self.statistic_obj.predict(time_series)
        ewma_result = self.ewma_obj.predict(time_series)
        polynomial_result = self.polynomial_obj.predict(time_series, window)
-        iforest_result = self.iforest_obj.predict(time_series, window)
-        if statistic_result == 0 or ewma_result == 0 or polynomial_result == 0 or iforest_result == 0:
+        if statistic_result == 0 or ewma_result == 0 or polynomial_result == 0 :
            xgb_result = self.supervised_obj.predict(time_series, window, model_name)
            res_value = xgb_result[0]
            prob = xgb_result[1]
@ -94,9 +102,16 @@ class Detect(object):

    def rate_predict(self, data):
        """
-        Predict the data
+        Predict if the latest value is an outlier or not.

-        :param data: the time series to detect of
+        :param data: The attributes are:
+                    'dataC', a piece of data to learn,
+                    'dataB', a piece of data to learn,
+                    'dataA', a piece of data to learn and the latest value to be detected.
+        :type data: Dictionary-like object
+        :return: The attributes are:
+                    'p', the class probability,
+                    'ret', the result of detect(1 denotes normal, 0 denotes abnormal).
        """
        combined_data = data["dataC"] + "," + data["dataB"] + "," + data["dataA"]
        time_series = map(float, combined_data.split(','))
--- a/time_series_detector/feature/classification_features.py
+++ b/time_series_detector/feature/classification_features.py
@ -10,6 +10,8 @@ Unless required by applicable law or agreed to in writing, software distributed

 import numpy as np
 import tsfresh.feature_extraction.feature_calculators as ts_feature_calculators
+from time_series_detector.common.tsd_common import DEFAULT_WINDOW, split_time_series
+from statistical_features import time_series_mean, time_series_variance, time_series_standard_deviation, time_series_median


 def time_series_autocorrelation(x):
@ -35,6 +37,8 @@ def time_series_autocorrelation(x):
    :return type: float
    """
    lag = int((len(x) - 3) / 5)
+    if np.sqrt(np.var(x)) < 1e-10:
+        return 0
    return ts_feature_calculators.autocorrelation(x, lag)


@ -47,6 +51,8 @@ def time_series_coefficient_of_variation(x):
    :return: the value of this feature
    :return type: float
    """
+    if np.sqrt(np.var(x)) < 1e-10:
+        return 0
    return np.mean(x) / np.sqrt(np.var(x))


@ -74,15 +80,132 @@ def time_series_binned_entropy(x):
        result.append(ts_feature_calculators.binned_entropy(x, value))
    return result

+
+def time_series_value_distribution(x):
+    """
+    Given buckets, calculate the percentage of elements in the whole time series
+    in different buckets
+
+    :param x: normalized time series
+    :type x: pandas.Series
+    :return: the values of this feature
+    :return type: list
+    """
+    thresholds = [0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0, 1.0]
+    return list(np.histogram(x, bins=thresholds)[0] / float(len(x)))
+
+
+def time_series_daily_parts_value_distribution(x):
+    """
+    Given buckets, calculate the percentage of elements in three subsequences
+    of the whole time series in different buckets
+
+    :param x: normalized time series
+    :type x: pandas.Series
+    :return: the values of this feature
+    :return type: list
+    """
+    thresholds = [0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0, 1.0]
+    split_value_list = split_time_series(x, DEFAULT_WINDOW)
+    data_c = split_value_list[0] + split_value_list[1][1:]
+    data_b = split_value_list[2] + split_value_list[3][1:]
+    data_a = split_value_list[4]
+    count_c = list(np.histogram(data_c, bins=thresholds)[0])
+    count_b = list(np.histogram(data_b, bins=thresholds)[0])
+    count_a = list(np.histogram(data_a, bins=thresholds)[0])
+    return list(np.array(count_c) / float(len(data_c))) + list(np.array(count_b) / float(len(data_b))) + list(np.array(count_a) / float(len(data_a)))
+
+
+def time_series_daily_parts_value_distribution_with_threshold(x):
+    """
+    Split the whole time series into three parts: c, b, a.
+    Given a threshold = 0.01, return the percentage of elements of time series
+    which are less than threshold
+
+    :param x: normalized time series
+    :type x: pandas.Series
+    :return: 6 values of this feature
+    :return type: list
+    """
+    threshold = 0.01
+    split_value_list = split_time_series(x, DEFAULT_WINDOW)
+    data_c = split_value_list[0] + split_value_list[1][1:]
+    data_b = split_value_list[2] + split_value_list[3][1:]
+    data_a = split_value_list[4]
+
+    # the number of elements in time series which is less than threshold:
+    nparray_data_c_threshold = np.array(data_c)
+    nparray_data_c_threshold[nparray_data_c_threshold < threshold] = -1
+    nparray_data_b_threshold = np.array(data_b)
+    nparray_data_b_threshold[nparray_data_b_threshold < threshold] = -1
+    nparray_data_a_threshold = np.array(data_a)
+    nparray_data_a_threshold[nparray_data_a_threshold < threshold] = -1
+
+    # the total number of elements in time series which is less than threshold:
+    nparray_threshold_count = (nparray_data_c_threshold == -1).sum() + (nparray_data_b_threshold == -1).sum() + (nparray_data_a_threshold == -1).sum()
+
+    if nparray_threshold_count == 0:
+        features = [0, 0, 0]
+    else:
+        features = [
+            (nparray_data_c_threshold == -1).sum() / float(nparray_threshold_count),
+            (nparray_data_b_threshold == -1).sum() / float(nparray_threshold_count),
+            (nparray_data_a_threshold == -1).sum() / float(nparray_threshold_count)
+        ]
+
+    features.extend([
+                    (nparray_data_c_threshold == -1).sum() / float(len(data_c)),
+                    (nparray_data_b_threshold == -1).sum() / float(len(data_b)),
+                    (nparray_data_a_threshold == -1).sum() / float(len(data_a))
+                    ])
+    return features
+
+
+def time_series_window_parts_value_distribution_with_threshold(x):
+    """
+    Split the whole time series into five parts.
+    Given a threshold = 0.01, return the percentage of elements of time series
+    which are less than threshold
+
+    :param x: normalized time series
+    :type x: pandas.Series
+    :return: 5 values of this feature
+    :return type: list
+    """
+    threshold = 0.01
+    split_value_list = split_time_series(x, DEFAULT_WINDOW)
+
+    count_list = []
+    for value_list in split_value_list:
+        nparray_threshold = np.array(value_list)
+        nparray_threshold[nparray_threshold < threshold] = -1
+        count_list.append((nparray_threshold == -1).sum())
+
+    if sum(count_list) == 0:
+        features = [0, 0, 0, 0, 0]
+    else:
+        features = list(np.array(count_list) / float((DEFAULT_WINDOW + 1)))
+
+    return features
+
+
 # add yourself classification features here...


 def get_classification_features(x):
    classification_features = [
+        time_series_mean(x),
+        time_series_variance(x),
+        time_series_standard_deviation(x),
+        time_series_median(x),
        time_series_autocorrelation(x),
        time_series_coefficient_of_variation(x)
    ]
+    classification_features.extend(time_series_value_distribution(x))
+    classification_features.extend(time_series_daily_parts_value_distribution(x))
+    classification_features.extend(time_series_daily_parts_value_distribution_with_threshold(x))
+    classification_features.extend(time_series_window_parts_value_distribution_with_threshold(x))
    classification_features.extend(time_series_binned_entropy(x))
-    # append yourself classification features here...
+    # add yourself classification features here...

    return classification_features
--- a/time_series_detector/feature/feature_service.py
+++ b/time_series_detector/feature/feature_service.py
@ -34,9 +34,10 @@ def extract_features(time_series, window):
    split_time_series = tsd_common.split_time_series(time_series, window)
    # nomalize time_series
    normalized_split_time_series = tsd_common.normalize_time_series(split_time_series)
+    max_min_normalized_time_series = tsd_common.normalize_time_series_by_max_min(split_time_series)
    s_features = statistical_features.get_statistical_features(normalized_split_time_series[4])
    f_features = fitting_features.get_fitting_features(normalized_split_time_series)
-    c_features = classification_features.get_classification_features(normalized_split_time_series[0] + normalized_split_time_series[1][1:] + normalized_split_time_series[2] + normalized_split_time_series[3][1:] + normalized_split_time_series[4])
+    c_features = classification_features.get_classification_features(max_min_normalized_time_series)
    # combine features with types
    features = s_features + f_features + c_features
    return features
--- a/time_series_detector/feature/fitting_features.py
+++ b/time_series_detector/feature/fitting_features.py
@ -49,7 +49,7 @@ def time_series_weighted_moving_average(x):
    for w in range(1, min(50, DEFAULT_WINDOW), 5):
        w = min(len(x), w)  # avoid the case len(value_list) < w
        coefficient = np.array(range(1, w + 1))
-        temp_list.append((np.dot(coefficient, x[-w:])) / (w * (w + 1) / 2))
+        temp_list.append((np.dot(coefficient, x[-w:])) / float(w * (w + 1) / 2))
    return list(np.array(temp_list) - x[-1])


@ -210,6 +210,11 @@ def time_series_periodic_features(data_c_left, data_c_right, data_b_left, data_b
            periodic_features.append(-1)
        else:
            periodic_features.append(1)
+
+    step = DEFAULT_WINDOW / 6
+    for w in range(1, DEFAULT_WINDOW, step):
+        periodic_features.append(min(max(data_a[w - 1:w + step]) - data_a[-1], 0))
+        periodic_features.append(max(min(data_a[w - 1:w + step]) - data_a[-1], 0))
    return periodic_features

 # add yourself fitting features here...
--- a/time_series_detector/lib/detect.h
+++ b/time_series_detector/lib/detect.h
@ -0,0 +1,79 @@
+/*
+Tencent is pleased to support the open source community by making Metis available.
+Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
+Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
+https://opensource.org/licenses/BSD-3-Clause
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
+*/
+
+#ifndef _DETECT_H
+#define _DETECT_H
+
+#include <inttypes.h>
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+typedef struct {
+    int* data_a;
+    int* data_b;
+    int* data_c;
+    int len_a;
+    int len_b;
+    int len_c;
+} ValueData;
+
+typedef struct {
+    double* data_a;
+    double* data_b;
+    double* data_c;
+    int len_a;
+    int len_b;
+    int len_c;
+} RateData;
+
+enum TSD_ERR_CODE
+{
+    TSD_SUCCESS = 0,
+    TSD_INVALID_HANDLER = -1,
+    TSD_CHECK_PARAM_FAILED = -2,
+    TSD_TIMESERIES_INIT_ERROR = -3
+};
+
+enum TSD_SAMPLE_RESULT
+{
+    TSD_NEGATIVE = 0,
+    TSD_POSITIVE = 1
+};
+
+/*!
+ * \load xgb model from xgb file
+ * \param fname xgb file path and name
+ * \return handle when success, NULL when failure happens
+*/
+void * load_model(const char *fname);
+
+/*!
+ * \Predict if the latest value is an outlier or not.
+ * \param mhandle the handle of the xgb model
+ * \param data the input data
+ * \param sample_result:(1 denotes noraml, 0 denotes abnormal).
+ * \return 0 when success, <0 when failure happens
+*/
+int value_predict(void * mhandle, ValueData* data, int* sample_result, float* prob);
+
+/*!
+ * \Predict if the latest value is an outlier or not.
+ * \param mhandle the handle of the xgb model
+ * \param data the input data
+ * \param sample_result:(1 denotes noraml, 0 denotes abnormal).
+ * \return 0 when success, <0 when failure happens
+*/
+int rate_predict(RateData* data, int* sample_result, float* prob);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/time_series_detector/lib/libdetect.so
+++ b/time_series_detector/lib/libdetect.so
--- a/time_series_detector/model/xgb_default_model
+++ b/time_series_detector/model/xgb_default_model
Author	SHA1	Message	Date
lxd1190	bf50841faf	Merge pull request #60 from ct-git/master 加了一个端口小问题的提醒	2019-04-18 11:44:06 +08:00
ct-git	0a49a9b1ee	加了一个端口小问题的提醒。注意：Metis依赖80和8080端口，腾讯云服务器默认开通了80但没有开通8080的外网访问权限，需要手动在安全组中增加对8080端口的放通。	2019-04-18 11:14:29 +08:00
ct-git	8364184c46	Update install.md 注意：Metis依赖80和8080端口，腾讯云服务器默认开通了80但没有开通8080的外网访问权限，需要手动在安全组中增加对8080端口的放通。	2019-04-18 11:03:26 +08:00
ct-git	8cbdc6dc93	Update install.md 增加说明 Metis依赖80和8080端口，腾讯云服务器默认开通了80但没有开通8080的外网访问权限，需要手动在安全组中增加对8080端口的放通。	2019-04-18 11:00:29 +08:00
lxd1190	5490207e81	remove iforest	2019-03-22 15:24:27 +08:00
lxd1190	1a8eab9ec5	fix(tsd_common): fix normalize problem	2018-11-22 19:59:38 +08:00
lxd1190	e21a00bcd4	feat(so): add detect so	2018-11-21 17:17:01 +08:00
lxd1190	6e8344de95	feat(feature): add detect features	2018-11-21 17:12:41 +08:00
lxd1190	6945e32cc5	docs(api): add so api desc	2018-11-20 10:32:16 +08:00
lxd1190	024957e32d	docs(api): add so api desc	2018-11-20 10:23:02 +08:00
lxd1190	3c46aa7282	style(detect): add code annotation	2018-11-09 15:55:49 +08:00
lxd1190	a9348864e0	docs(install): update install and api docs	2018-11-08 20:22:11 +08:00
lxd1190	eb9752200e	Merge branch 'dev' of https://github.com/Tencent/Metis into dev	2018-11-08 19:35:08 +08:00
lxd1190	7c42a3ccae	docs(api): add python api description	2018-11-08 19:34:12 +08:00
test	13802fd1f9	docs(changlog): add changlog	2018-11-08 16:50:18 +08:00