You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

464 lines
21 KiB

#!/usr/bin/python3.6.5
# -*- coding: utf-8 -*-
# Time : 2020/3/29 23:03
# Author : HeKang
# Github : https://github.com/JustKeepSilence
import time
import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.stats.distributions import chi2
import json
from json import JSONDecodeError
import sys
import traceback
import pymssql
import requests
import datetime
from scipy.stats import norm
from scipy.stats import f
from scipy.stats import chi2
import jenkspy
# import pca_train_k
import pcamtcltest_k
# import model_performance
import xlrd
import random
from pylab import mpl
import matplotlib.pyplot as plt
from collections import Counter
from operator import itemgetter
import PCA_Test
import config
from recon import Lars, recon_fault_diagnosis_r, recon_fault_diagnosis_r_l, recon_fault_diagnosis_r_c
def pca(model, LockVariable,Data_origin,number_sample, number_fault_variable,
low_f,high_f,Test_type,Limit_value, time_data, spe_recon,expand=True, sort_data=False):
if len(low_f) >= 12:
para_length = 3
elif 12 > len(low_f) >= 7:
para_length = 2
else:
para_length = 1
low_f=np.array(low_f)/np.array(model["Train_X_std"]) # 对最低幅度归一�?
high_f=np.array(high_f)/np.array(model["Train_X_std"]) # 对最高幅度归一�?
yy = list(range(0, Data_origin.shape[0]))
time_sample = []
if expand:
# random_row = np.random.choice(a=yy, size=number_sample)
if len(yy) < 1000:
random_row = np.array(list(range(len(yy))))
else:
random_row = np.array(random.sample(yy, number_sample))
time_original = time_data[random_row, :]
Data_origin = Data_origin[random_row, :] # 扩充选择1000个样�?
else:
# random_row = np.array(random.sample(yy, number_sample))
random_row = np.random.choice(a=yy, size=number_sample)
time_original = time_data[random_row, :]
Data_origin = Data_origin[random_row, :] # 扩充选择random_row个样�?
Data = (Data_origin - model["Train_X_mean"]) / model["Train_X_std"]
featValue = np.array(model["featValue"])+0.00001 # 训练数据的特征�?
featVec = np.array(model["featVec"]) # 训练数据的特征向�?
numbel_variable = featValue.shape[0]
k = (model["K"]) # 主元个数
selectVec = featVec[:, 0:k] ####自己选择�?
featValue_sort = featValue # [index] # 排序后的特征�?
############----------*********-SPE-**************----------########################
# LockVariable="3,5"
C_ = np.eye(numbel_variable) - np.dot(selectVec, selectVec.T)
X_SPE = C_.T
D_SPE = C_
DIAG_SPE = np.eye(numbel_variable)
############----------*********-T2-**************----------########################
DIAG_T2 = np.linalg.pinv(np.diag(featValue_sort[:int(model["K"])]))
D_T2 = selectVec.copy()
X_T2 = np.dot(D_T2, np.linalg.cholesky(DIAG_T2)).T
############----------*********-综合指标-**************----------########################
II = featValue_sort.copy()
II[:int(model["K"])] = II[:int(model["K"])] * model["T2CUL_99"]
II[int(model["K"]):] = model["QCUL_99"]
DIAG_Fai = np.linalg.inv(np.diag(II))
D_Fai = featVec.copy()
X_Fai = np.dot(D_Fai, np.linalg.cholesky(DIAG_Fai)).T
# ************************调用LARS*******************************
t = 50000
original_line, recon_line = [], []
limit_line=float(Limit_value)
beta_path = []
SPE_list = []
FAI_list = []
Fault_noise = np.zeros([np.array(Data_origin).shape[0], np.array(Data_origin).shape[1]]) # 故障变化幅度
Fault_index = np.zeros([np.array(Data_origin).shape[0], np.array(Data_origin).shape[1]]) # 真实故障方向
paraState = np.zeros([np.array(Data_origin).shape[0], np.array(Data_origin).shape[1]]) # 故障重构的方�?
index_num, index_point = [], []
all_FDR = []
all_FAR = []
fault_free = []
fdr = {}
far = {}
beta_pro = []
fault_index_row = []
plots_matrix = [] # 贡献图法的矩�?
plots_index = [] # 贡献图法的index
findex = np.where(low_f != 0)[0] # 判断加在那个参数�?
for ii in range(Data.shape[0]):
Y = Data[ii, :] # 测试数据的每一�?
noise=[]
for j in range(number_fault_variable):
noise.append(np.random.uniform(low_f[findex[j]], high_f[findex[j]])) # 均匀分布
Fault_index[ii, findex] = 1
Fault_noise[ii, findex] = noise
if Test_type == "FAI":
original_line.append(Y @ D_Fai @ DIAG_Fai @ D_Fai.T @ Y.T)
elif Test_type == "SPE":
original_line.append(Y @ X_SPE.T @ X_SPE @ Y.T)
Y = Y + Fault_noise[ii, :]
#########*********************计算SPE******************************
SPE_line = np.dot(Y, C_).dot(Y.T)
SPE_list.append(SPE_line)
#########################计算综合指标##########
FAI_list.append(np.dot(Y.T, D_Fai).dot(DIAG_Fai).dot(D_Fai.T).dot(Y))
# **************计算LARS***************
if Test_type=="FAI":
beta, mse = Lars(X_Fai, Y, D_Fai, DIAG_Fai, t, limit_line, LockVariable)
elif Test_type=="SPE":
beta, mse = Lars(X_SPE, Y, D_SPE, DIAG_SPE, t, limit_line, LockVariable)
else:
beta, mse = Lars(X_T2, Y, D_T2, DIAG_T2, t, limit_line, LockVariable)
beta_end = abs(beta[-1, :])
pi = len(beta_end)
if pi > 7:
jenk = jenkspy.jenks_breaks(beta_end, 5)
else:
jenk = jenkspy.jenks_breaks(beta_end, 2)
limt = (jenk[1] + jenk[2]) / 2
index = np.where(beta_end > 0)[0]
fault_index_row.append(ii)
if len(index) > para_length:
if Test_type == "SPE" and np.linalg.det(X_SPE @ X_SPE.T) > 1e-15:
res = recon_fault_diagnosis_r(Y, X_SPE @ X_SPE.T, limit_line, list(zip(index, beta_end[index])), model, True,
X_SPE @ X_SPE.T, LockVariable, selectVec, None)
if not isinstance(res[0], list):
if res[1] == "plot":
# beta[-1, :] = res[0]
plots_matrix.append(res[0])
plots_index.append(ii)
else:
beta[-1, :], index = res[0].T, res[1]
elif Test_type == "FAI" and np.linalg.det( D_Fai @ DIAG_Fai @ D_Fai.T) > 1e-15:
res = recon_fault_diagnosis_r_c(Y, D_Fai @ DIAG_Fai @ D_Fai.T, limit_line,
list(zip(index, beta_end[index])), model,
True, X_SPE @ X_SPE.T, LockVariable, selectVec, rbc=None)
if not isinstance(res[0], list):
if res[1] == "plot":
# beta[-1, :] = res[0]
plots_matrix.append(res[0])
plots_index.append(ii)
else:
beta[-1, :], index = res[0].T, res[1]
elif len(index) <= para_length and len(index) != 0:
if Test_type == "SPE":
res = recon_fault_diagnosis_r_l(Y, X_SPE @ X_SPE.T, index)
beta[-1, :], index = res[0].T, res[1]
elif Test_type == "FAI":
res = recon_fault_diagnosis_r_l(Y, D_Fai @ DIAG_Fai @ D_Fai.T, index)
beta[-1, :], index = res[0].T, res[1]
paraState[ii, index] = 1
beta_new = beta[-1, :] * paraState[ii, :]
beta_path.append(beta_new)
############################################################
if index.shape[0]==0:
fault_free.append(ii)
##### 统计fault detection rate (FDR)
d = [x for x in index if x in findex]
all_FDR.append(len(d) / (len(findex)+0.000001))
##计算单个参数的fdr
for i in findex:
if i not in fdr:
if i in index:
fdr.update({i: [1]})
else:
fdr.update({i: [0]})
else:
if i in index:
fdr[i].append(1)
else:
fdr[i].append(0)
###### 统计false alarm rate(FAR)
a = [x for x in index if x not in d]
all_FAR.append(len(a) / ((numbel_variable - len(findex))+0.0001))
###统计单个参数的far
beta_path = np.array(beta_path)
FDR = sum(all_FDR) / Data.shape[0]#诊出�?
FAR = sum(all_FAR) / Data.shape[0]#误诊�?
fdr_variable = []#单参数诊出率
far_variable = []#单参数误诊率
for variable in range(numbel_variable):
if variable not in fdr:
fdr_variable.append(0)
far_variable.append(0)
else:
fdr_variable.append(sum(fdr[variable]) / len(fdr[variable]))
far_variable.append(FAR)
fault_data=Data+Fault_noise
fault_data = fault_data[fault_index_row, :]
Fault_data=np.add(np.multiply(fault_data, model["Train_X_std"]), model["Train_X_mean"])
finalData = fault_data - beta_path
reconData = np.add(np.multiply(finalData, model["Train_X_std"]), model["Train_X_mean"]) # 重构�?
if len(plots_matrix) != 0:
reconData[plots_index] = plots_matrix
Data_origin = Data_origin[fault_index_row, :]
real_error_data = Data_origin - reconData
errorData = sum(abs(Data_origin - reconData)) # 偏差�?
Reconstruction_precision=errorData/Data.shape[0]
test_data = np.around(Data_origin.T, decimals=3).tolist()
error_data = np.around(real_error_data.T, decimals=3).tolist()
time_stamp = time_original.T.tolist()[0]
max_recon = list(zip(np.max(beta_path, axis=0).tolist(), np.min(beta_path, axis=0).tolist()))
train_mean = model["Train_X_mean"]
train_std = model["Train_X_std"]
items = [
('fdr_variable', np.around(fdr_variable, decimals=3).tolist()),
('far_variable', np.around(far_variable, decimals=3).tolist()),
('Reconstruction_precision', np.around(Reconstruction_precision, decimals=3).tolist()),
("error_data", error_data),
("test_data", test_data),
("original_line", original_line),
("time_stamp", time_stamp),
("max_recon", max_recon),
("mean", train_mean),
("std", train_std)
]
result = dict(items)
return result
def isnumber(limits):
flag = True
for item in limits:
item = item.replace("-","")
if not item.isdigit():
flag = False
break
return flag
def cleanmain(info):
try:
ItemsInfo, SamplingTimePeriods = [], []
model_id = info["Model_id"]
version = info["version"]
if version == "v-test":
res = PCA_Test.get_model_by_ID(model_id)
else:
res = PCA_Test.get_model_by_id_and_version(model_id, version)
lock = []
point_info = res["pointInfo"]
interval = info["Test_Data"]["interval"]
for i in range(len(point_info)):
try:
if point_info[i]["lock"]:
lock.append(i)
except:
continue
Test_Data = info["Test_Data"]
points = Test_Data["points"].split(",")
try:
times = Test_Data["time"].split(";")
for i in range(len(times)):
Eachsampletime = {}
timess = times[i].split(',')
Eachsampletime["StartingTime"] = timess[0]
Eachsampletime["TerminalTime"] = timess[1]
SamplingTimePeriods.append(Eachsampletime)
except KeyError:
SamplingTimePeriods = [{"StartingTime": item["st"],
"TerminalTime": item["et"]} for item in res["trainTime"]]
model = res["para"]["Model_info"]
condition = info["condition"].replace("=", "==").replace(">=", ">").replace("<=", "<")
dead = info["dead"].split(',')
limit = info["limit"].split(',')
uplower = info["uplow"].split(';')
count = 0
Constraint = ""
for i in range(len(points)):
iteminfo = {}
iteminfo["ItemName"] = points[i] # 加点
if dead[i] == "1": # 判断是否参与死区清洗
iteminfo["ClearDeadZone"] = "true"
else:
iteminfo["ClearDeadZone"] = "false"
if limit[i] == "1": # 参与上下限清�?
limits = uplower[i].split(',')
if isnumber(limits): # 输入上下限正�?
count += 1
Constraint += "[" + points[i] + "]>" + limits[0] + " and " + "[" + points[i] + "]<" + limits[
1] + " and "
ItemsInfo.append(iteminfo)
if count != 0:
Constraint = Constraint[:len(Constraint) - 4:]
else:
Constraint = "1==1" # 没有上下限清�?
Constraint += " and (" + condition + ")"
Constraint = Constraint.replace("\n", " ")
url = f"http://{config._CLEAN_IP}/exawebapi/exatime/GetCleaningData?ItemsInfo=%s&SamplingTimePeriods=%s&Constraint=%s&SamplingPeriod=%s&DCount=6" % (
ItemsInfo, SamplingTimePeriods, Constraint, interval)
response = requests.get(url)
content = json.loads(response.text)
data = np.array([item for item in content["ClearData"]]).T # test data
time_data = np.array([item for item in content["TimeData"]]).reshape(data.shape[0], -1)
try:
data = np.array(info["data"])
except KeyError:
pass
number_sample = info["number_sample"]
number_fault_variable = info["number_fault_variable"]
low_f = [float(item) for item in info["low_f"].split(',')]
high_f = [float(item) for item in info["high_f"].split(',')]
test_type = info["Test_Type"]
limit_value = info["Limit_Value"]
try:
expand = info["expand"]
# sort_data = info["sort_data"]
except KeyError:
expand = True
# sort_data = True
spe_recon = False
result = pca(model, lock, data, number_sample, number_fault_variable, low_f, high_f, test_type,
limit_value,time_data,spe_recon, expand)
try:
index = Test_Data["time"].index(",")
result["time"] = Test_Data["time"][:index:]
except KeyError:
result["time"] = sorted(res["trainTime"], key=lambda item: item["st"])[0]["st"]
result["BeforeCleanSamNum"] = content["BeforeCleanSamNum"]
result["AfterCleanSamNum"] = content["AfterCleanSamNum"]
result["amplitude"] = high_f
result["CleanOrNot"] = True
return result
except Exception as e:
msg = traceback.format_exc()
result = [{"CleanOrNot": False, 'msg': msg}]
return result
def main(info):
try:
ItemsInfo, SamplingTimePeriods = [], []
model_id = info["Model_id"]
version = info["version"]
if version == "v-test":
res = PCA_Test.get_model_by_ID(model_id)
else:
res = PCA_Test.get_model_by_id_and_version(model_id, version)
lock = []
point_info = res["pointInfo"]
interval = info["Test_Data"]["interval"]
for i in range(len(point_info)):
try:
if point_info[i]["lock"]:
lock.append(i)
except:
continue
Test_Data = info["Test_Data"]
points = Test_Data["points"].split(",")
try:
times = Test_Data["time"].split(";")
for i in range(len(times)):
Eachsampletime = {}
timess = times[i].split(',')
Eachsampletime["StartingTime"] = timess[0]
Eachsampletime["TerminalTime"] = timess[1]
SamplingTimePeriods.append(Eachsampletime)
except KeyError:
SamplingTimePeriods = [{"StartingTime": item["st"],
"TerminalTime": item["et"]} for item in res["trainTime"]]
model = res["para"]["Model_info"]
condition = info["condition"].replace("=", "==").replace(">=", ">").replace("<=", "<")
dead = info["dead"].split(',')
limit = info["limit"].split(',')
uplower = info["uplow"].split(';')
count = 0
Constraint = ""
for i in range(len(points)):
iteminfo = {}
iteminfo["ItemName"] = points[i] # 加点
if dead[i] == "1": # 判断是否参与死区清洗
iteminfo["ClearDeadZone"] = "true"
else:
iteminfo["ClearDeadZone"] = "false"
if limit[i] == "1": # 参与上下限清�?
limits = uplower[i].split(',')
if isnumber(limits): # 输入上下限正�?
count += 1
Constraint += "[" + points[i] + "]>" + limits[0] + " and " + "[" + points[i] + "]<" + limits[
1] + " and "
ItemsInfo.append(iteminfo)
if count != 0:
Constraint = Constraint[:len(Constraint) - 4:]
else:
Constraint = "1==1" # 没有上下限清�?
Constraint += " and (" + condition + ")"
Constraint = Constraint.replace("\n", " ")
url = f"http://{config._CLEAN_IP}/exawebapi/exatime/GetCleaningData?ItemsInfo=%s&SamplingTimePeriods=%s&Constraint=%s&SamplingPeriod=%s&DCount=6" % (
ItemsInfo, SamplingTimePeriods, Constraint, interval)
response = requests.get(url)
content = json.loads(response.text)
data = np.array([item for item in content["ClearData"]]).T # test data
time_data = np.array([item for item in content["TimeData"]]).reshape(data.shape[0], -1)
test_type = info["Test_Type"]
limit_value = info["Limit_Value"]
number_sample = info["number_sample"]
try:
expand = info["expand"]
# sort_data = info["sort_data"]
except KeyError:
expand = True
# sort_data = True
spe_recon = False
far, fdr, amplitude = [], [], []
recon = []
result = {}
for _, ass in enumerate(info["assess"]):
number_fault_variable = ass["number_fault_variable"]
low_f = [float(item) for item in ass["low_f"].split(',')]
high_f = [float(item) for item in ass["high_f"].split(',')]
result_temp = pca(model, lock, data, number_sample, number_fault_variable, low_f, high_f, test_type,
limit_value, time_data, spe_recon, expand)
t_index = np.nonzero(high_f)[0][0]#索引high_f中第一个非0数位置的行数;在矩阵的第3行,t_index == 2
far.append(result_temp["far_variable"][t_index])
fdr.append(result_temp["fdr_variable"][t_index])
recon.append(result_temp["Reconstruction_precision"][t_index])
amplitude.append(high_f[t_index])
try:
index = Test_Data["time"].index(",")
result["time"] = Test_Data["time"][:index:]
except KeyError:
result["time"] = sorted(res["trainTime"], key=lambda item: item["st"])[0]["st"]
result["BeforeCleanSamNum"] = content["BeforeCleanSamNum"]
result["AfterCleanSamNum"] = content["AfterCleanSamNum"]
result["amplitude"] = amplitude
result["CleanOrNot"] = True
result["far_variable"] = far
result["fdr_variable"] = fdr
result["Reconstruction_precision"] = recon
return result
except Exception as e:
msg = traceback.format_exc()
result = [{"CleanOrNot": False, 'msg': msg}]
return result #
if __name__ == "__main__":
info_str = {"expand":False,"Test_Data":{"time":"2021-07-14 15:41:04,2021-07-15 15:41:04","points":"DH4_40LCA30CT304,DH4_40LCA30CT303,DH4_40LCA10CT301,DH4_40LCJ50CT301,DH4_40LCA30CT305,DH4_40LCJ50AA101GT,DH4_40LCJ60CT301,DH4_40LCA70CT301,DH4_40LCA70CT302","interval":300000},"dead":"1,1,1,1,1,1,1,1,1","limit":"0,0,0,0,0,0,0,0,0","Model_id":"292","Model_alg":"PCA","number_sample":5000,"assess":[{"number_fault_variable":1,"low_f":"3.31,0,0,0,0,0,0,0,0","high_f":"3.31,0,0,0,0,0,0,0,0"},{"number_fault_variable":1,"low_f":"0,2.78,0,0,0,0,0,0,0","high_f":"0,2.78,0,0,0,0,0,0,0"},{"number_fault_variable":1,"low_f":"0,0,3.34,0,0,0,0,0,0","high_f":"0,0,3.34,0,0,0,0,0,0"},{"number_fault_variable":1,"low_f":"0,0,0,3.44,0,0,0,0,0","high_f":"0,0,0,3.44,0,0,0,0,0"},{"number_fault_variable":1,"low_f":"0,0,0,0,3.54,0,0,0,0","high_f":"0,0,0,0,3.54,0,0,0,0"},{"number_fault_variable":1,"low_f":"0,0,0,0,0,4.08,0,0,0","high_f":"0,0,0,0,0,4.08,0,0,0"},{"number_fault_variable":1,"low_f":"0,0,0,0,0,0,3.31,0,0","high_f":"0,0,0,0,0,0,3.31,0,0"},{"number_fault_variable":1,"low_f":"0,0,0,0,0,0,0,2.75,0","high_f":"0,0,0,0,0,0,0,2.75,0"},{"number_fault_variable":1,"low_f":"0,0,0,0,0,0,0,0,3.05","high_f":"0,0,0,0,0,0,0,0,3.05"}],"condition":"[DH4_40DEH-01FU101]>300 and [DH4_40DEH-01FU101]<600 ","Test_Type":"FAI","Limit_Value":1.359,"uplow":"null,null;null,null;null,null;null,null;null,null;null,null;null,null;null,null;null,null","number":0,"version":"v-2020-09-03 18:03:22"}
res = main(info_str)
print("aaa")