#!/usr/bin/python3.6.5 # -*- coding: utf-8 -*- # Time : 2020/3/29 23:03 # Author : HeKang # Github : https://github.com/JustKeepSilence import time import numpy as np import pandas as pd from scipy.stats import norm from scipy.stats.distributions import chi2 import json from json import JSONDecodeError import sys import traceback import pymssql import requests import datetime from scipy.stats import norm from scipy.stats import f from scipy.stats import chi2 import jenkspy # import pca_train_k import pcamtcltest_k # import model_performance import xlrd import random from pylab import mpl import matplotlib.pyplot as plt from collections import Counter from operator import itemgetter import PCA_Test import config from recon import Lars, recon_fault_diagnosis_r, recon_fault_diagnosis_r_l, recon_fault_diagnosis_r_c def pca(model, LockVariable,Data_origin,number_sample, number_fault_variable, low_f,high_f,Test_type,Limit_value, time_data, spe_recon,expand=True, sort_data=False): if len(low_f) >= 12: para_length = 3 elif 12 > len(low_f) >= 7: para_length = 2 else: para_length = 1 low_f=np.array(low_f)/np.array(model["Train_X_std"]) # 对最低幅度归一�? high_f=np.array(high_f)/np.array(model["Train_X_std"]) # 对最高幅度归一�? yy = list(range(0, Data_origin.shape[0])) time_sample = [] if expand: # random_row = np.random.choice(a=yy, size=number_sample) if len(yy) < 1000: random_row = np.array(list(range(len(yy)))) else: random_row = np.array(random.sample(yy, number_sample)) time_original = time_data[random_row, :] Data_origin = Data_origin[random_row, :] # 扩充选择1000个样�? else: # random_row = np.array(random.sample(yy, number_sample)) random_row = np.random.choice(a=yy, size=number_sample) time_original = time_data[random_row, :] Data_origin = Data_origin[random_row, :] # 扩充选择random_row个样�? Data = (Data_origin - model["Train_X_mean"]) / model["Train_X_std"] featValue = np.array(model["featValue"])+0.00001 # 训练数据的特征�? featVec = np.array(model["featVec"]) # 训练数据的特征向�? numbel_variable = featValue.shape[0] k = (model["K"]) # 主元个数 selectVec = featVec[:, 0:k] ####自己选择�? featValue_sort = featValue # [index] # 排序后的特征�? ############----------*********-SPE-**************----------######################## # LockVariable="3,5" C_ = np.eye(numbel_variable) - np.dot(selectVec, selectVec.T) X_SPE = C_.T D_SPE = C_ DIAG_SPE = np.eye(numbel_variable) ############----------*********-T2-**************----------######################## DIAG_T2 = np.linalg.pinv(np.diag(featValue_sort[:int(model["K"])])) D_T2 = selectVec.copy() X_T2 = np.dot(D_T2, np.linalg.cholesky(DIAG_T2)).T ############----------*********-综合指标-**************----------######################## II = featValue_sort.copy() II[:int(model["K"])] = II[:int(model["K"])] * model["T2CUL_99"] II[int(model["K"]):] = model["QCUL_99"] DIAG_Fai = np.linalg.inv(np.diag(II)) D_Fai = featVec.copy() X_Fai = np.dot(D_Fai, np.linalg.cholesky(DIAG_Fai)).T # ************************调用LARS******************************* t = 50000 original_line, recon_line = [], [] limit_line=float(Limit_value) beta_path = [] SPE_list = [] FAI_list = [] Fault_noise = np.zeros([np.array(Data_origin).shape[0], np.array(Data_origin).shape[1]]) # 故障变化幅度 Fault_index = np.zeros([np.array(Data_origin).shape[0], np.array(Data_origin).shape[1]]) # 真实故障方向 paraState = np.zeros([np.array(Data_origin).shape[0], np.array(Data_origin).shape[1]]) # 故障重构的方�? index_num, index_point = [], [] all_FDR = [] all_FAR = [] fault_free = [] fdr = {} far = {} beta_pro = [] fault_index_row = [] plots_matrix = [] # 贡献图法的矩�? plots_index = [] # 贡献图法的index findex = np.where(low_f != 0)[0] # 判断加在那个参数�? for ii in range(Data.shape[0]): Y = Data[ii, :] # 测试数据的每一�? noise=[] for j in range(number_fault_variable): noise.append(np.random.uniform(low_f[findex[j]], high_f[findex[j]])) # 均匀分布 Fault_index[ii, findex] = 1 Fault_noise[ii, findex] = noise if Test_type == "FAI": original_line.append(Y @ D_Fai @ DIAG_Fai @ D_Fai.T @ Y.T) elif Test_type == "SPE": original_line.append(Y @ X_SPE.T @ X_SPE @ Y.T) Y = Y + Fault_noise[ii, :] #########*********************计算SPE****************************** SPE_line = np.dot(Y, C_).dot(Y.T) SPE_list.append(SPE_line) #########################计算综合指标########## FAI_list.append(np.dot(Y.T, D_Fai).dot(DIAG_Fai).dot(D_Fai.T).dot(Y)) # **************计算LARS*************** if Test_type=="FAI": beta, mse = Lars(X_Fai, Y, D_Fai, DIAG_Fai, t, limit_line, LockVariable) elif Test_type=="SPE": beta, mse = Lars(X_SPE, Y, D_SPE, DIAG_SPE, t, limit_line, LockVariable) else: beta, mse = Lars(X_T2, Y, D_T2, DIAG_T2, t, limit_line, LockVariable) beta_end = abs(beta[-1, :]) pi = len(beta_end) if pi > 7: jenk = jenkspy.jenks_breaks(beta_end, 5) else: jenk = jenkspy.jenks_breaks(beta_end, 2) limt = (jenk[1] + jenk[2]) / 2 index = np.where(beta_end > 0)[0] fault_index_row.append(ii) if len(index) > para_length: if Test_type == "SPE" and np.linalg.det(X_SPE @ X_SPE.T) > 1e-15: res = recon_fault_diagnosis_r(Y, X_SPE @ X_SPE.T, limit_line, list(zip(index, beta_end[index])), model, True, X_SPE @ X_SPE.T, LockVariable, selectVec, None) if not isinstance(res[0], list): if res[1] == "plot": # beta[-1, :] = res[0] plots_matrix.append(res[0]) plots_index.append(ii) else: beta[-1, :], index = res[0].T, res[1] elif Test_type == "FAI" and np.linalg.det( D_Fai @ DIAG_Fai @ D_Fai.T) > 1e-15: res = recon_fault_diagnosis_r_c(Y, D_Fai @ DIAG_Fai @ D_Fai.T, limit_line, list(zip(index, beta_end[index])), model, True, X_SPE @ X_SPE.T, LockVariable, selectVec, rbc=None) if not isinstance(res[0], list): if res[1] == "plot": # beta[-1, :] = res[0] plots_matrix.append(res[0]) plots_index.append(ii) else: beta[-1, :], index = res[0].T, res[1] elif len(index) <= para_length and len(index) != 0: if Test_type == "SPE": res = recon_fault_diagnosis_r_l(Y, X_SPE @ X_SPE.T, index) beta[-1, :], index = res[0].T, res[1] elif Test_type == "FAI": res = recon_fault_diagnosis_r_l(Y, D_Fai @ DIAG_Fai @ D_Fai.T, index) beta[-1, :], index = res[0].T, res[1] paraState[ii, index] = 1 beta_new = beta[-1, :] * paraState[ii, :] beta_path.append(beta_new) ############################################################ if index.shape[0]==0: fault_free.append(ii) ##### 统计fault detection rate (FDR) d = [x for x in index if x in findex] all_FDR.append(len(d) / (len(findex)+0.000001)) ##计算单个参数的fdr for i in findex: if i not in fdr: if i in index: fdr.update({i: [1]}) else: fdr.update({i: [0]}) else: if i in index: fdr[i].append(1) else: fdr[i].append(0) ###### 统计false alarm rate(FAR) a = [x for x in index if x not in d] all_FAR.append(len(a) / ((numbel_variable - len(findex))+0.0001)) ###统计单个参数的far beta_path = np.array(beta_path) FDR = sum(all_FDR) / Data.shape[0]#诊出�? FAR = sum(all_FAR) / Data.shape[0]#误诊�? fdr_variable = []#单参数诊出率 far_variable = []#单参数误诊率 for variable in range(numbel_variable): if variable not in fdr: fdr_variable.append(0) far_variable.append(0) else: fdr_variable.append(sum(fdr[variable]) / len(fdr[variable])) far_variable.append(FAR) fault_data=Data+Fault_noise fault_data = fault_data[fault_index_row, :] Fault_data=np.add(np.multiply(fault_data, model["Train_X_std"]), model["Train_X_mean"]) finalData = fault_data - beta_path reconData = np.add(np.multiply(finalData, model["Train_X_std"]), model["Train_X_mean"]) # 重构�? if len(plots_matrix) != 0: reconData[plots_index] = plots_matrix Data_origin = Data_origin[fault_index_row, :] real_error_data = Data_origin - reconData errorData = sum(abs(Data_origin - reconData)) # 偏差�? Reconstruction_precision=errorData/Data.shape[0] test_data = np.around(Data_origin.T, decimals=3).tolist() error_data = np.around(real_error_data.T, decimals=3).tolist() time_stamp = time_original.T.tolist()[0] max_recon = list(zip(np.max(beta_path, axis=0).tolist(), np.min(beta_path, axis=0).tolist())) train_mean = model["Train_X_mean"] train_std = model["Train_X_std"] items = [ ('fdr_variable', np.around(fdr_variable, decimals=3).tolist()), ('far_variable', np.around(far_variable, decimals=3).tolist()), ('Reconstruction_precision', np.around(Reconstruction_precision, decimals=3).tolist()), ("error_data", error_data), ("test_data", test_data), ("original_line", original_line), ("time_stamp", time_stamp), ("max_recon", max_recon), ("mean", train_mean), ("std", train_std) ] result = dict(items) return result def isnumber(limits): flag = True for item in limits: item = item.replace("-","") if not item.isdigit(): flag = False break return flag def cleanmain(info): try: ItemsInfo, SamplingTimePeriods = [], [] model_id = info["Model_id"] version = info["version"] if version == "v-test": res = PCA_Test.get_model_by_ID(model_id) else: res = PCA_Test.get_model_by_id_and_version(model_id, version) lock = [] point_info = res["pointInfo"] interval = info["Test_Data"]["interval"] for i in range(len(point_info)): try: if point_info[i]["lock"]: lock.append(i) except: continue Test_Data = info["Test_Data"] points = Test_Data["points"].split(",") try: times = Test_Data["time"].split(";") for i in range(len(times)): Eachsampletime = {} timess = times[i].split(',') Eachsampletime["StartingTime"] = timess[0] Eachsampletime["TerminalTime"] = timess[1] SamplingTimePeriods.append(Eachsampletime) except KeyError: SamplingTimePeriods = [{"StartingTime": item["st"], "TerminalTime": item["et"]} for item in res["trainTime"]] model = res["para"]["Model_info"] condition = info["condition"].replace("=", "==").replace(">=", ">").replace("<=", "<") dead = info["dead"].split(',') limit = info["limit"].split(',') uplower = info["uplow"].split(';') count = 0 Constraint = "" for i in range(len(points)): iteminfo = {} iteminfo["ItemName"] = points[i] # 加点 if dead[i] == "1": # 判断是否参与死区清洗 iteminfo["ClearDeadZone"] = "true" else: iteminfo["ClearDeadZone"] = "false" if limit[i] == "1": # 参与上下限清�? limits = uplower[i].split(',') if isnumber(limits): # 输入上下限正�? count += 1 Constraint += "[" + points[i] + "]>" + limits[0] + " and " + "[" + points[i] + "]<" + limits[ 1] + " and " ItemsInfo.append(iteminfo) if count != 0: Constraint = Constraint[:len(Constraint) - 4:] else: Constraint = "1==1" # 没有上下限清�? Constraint += " and (" + condition + ")" Constraint = Constraint.replace("\n", " ") url = f"http://{config._CLEAN_IP}/exawebapi/exatime/GetCleaningData?ItemsInfo=%s&SamplingTimePeriods=%s&Constraint=%s&SamplingPeriod=%s&DCount=6" % ( ItemsInfo, SamplingTimePeriods, Constraint, interval) response = requests.get(url) content = json.loads(response.text) data = np.array([item for item in content["ClearData"]]).T # test data time_data = np.array([item for item in content["TimeData"]]).reshape(data.shape[0], -1) try: data = np.array(info["data"]) except KeyError: pass number_sample = info["number_sample"] number_fault_variable = info["number_fault_variable"] low_f = [float(item) for item in info["low_f"].split(',')] high_f = [float(item) for item in info["high_f"].split(',')] test_type = info["Test_Type"] limit_value = info["Limit_Value"] try: expand = info["expand"] # sort_data = info["sort_data"] except KeyError: expand = True # sort_data = True spe_recon = False result = pca(model, lock, data, number_sample, number_fault_variable, low_f, high_f, test_type, limit_value,time_data,spe_recon, expand) try: index = Test_Data["time"].index(",") result["time"] = Test_Data["time"][:index:] except KeyError: result["time"] = sorted(res["trainTime"], key=lambda item: item["st"])[0]["st"] result["BeforeCleanSamNum"] = content["BeforeCleanSamNum"] result["AfterCleanSamNum"] = content["AfterCleanSamNum"] result["amplitude"] = high_f result["CleanOrNot"] = True return result except Exception as e: msg = traceback.format_exc() result = [{"CleanOrNot": False, 'msg': msg}] return result def main(info): try: ItemsInfo, SamplingTimePeriods = [], [] model_id = info["Model_id"] version = info["version"] if version == "v-test": res = PCA_Test.get_model_by_ID(model_id) else: res = PCA_Test.get_model_by_id_and_version(model_id, version) lock = [] point_info = res["pointInfo"] interval = info["Test_Data"]["interval"] for i in range(len(point_info)): try: if point_info[i]["lock"]: lock.append(i) except: continue Test_Data = info["Test_Data"] points = Test_Data["points"].split(",") try: times = Test_Data["time"].split(";") for i in range(len(times)): Eachsampletime = {} timess = times[i].split(',') Eachsampletime["StartingTime"] = timess[0] Eachsampletime["TerminalTime"] = timess[1] SamplingTimePeriods.append(Eachsampletime) except KeyError: SamplingTimePeriods = [{"StartingTime": item["st"], "TerminalTime": item["et"]} for item in res["trainTime"]] model = res["para"]["Model_info"] condition = info["condition"].replace("=", "==").replace(">=", ">").replace("<=", "<") dead = info["dead"].split(',') limit = info["limit"].split(',') uplower = info["uplow"].split(';') count = 0 Constraint = "" for i in range(len(points)): iteminfo = {} iteminfo["ItemName"] = points[i] # 加点 if dead[i] == "1": # 判断是否参与死区清洗 iteminfo["ClearDeadZone"] = "true" else: iteminfo["ClearDeadZone"] = "false" if limit[i] == "1": # 参与上下限清�? limits = uplower[i].split(',') if isnumber(limits): # 输入上下限正�? count += 1 Constraint += "[" + points[i] + "]>" + limits[0] + " and " + "[" + points[i] + "]<" + limits[ 1] + " and " ItemsInfo.append(iteminfo) if count != 0: Constraint = Constraint[:len(Constraint) - 4:] else: Constraint = "1==1" # 没有上下限清�? Constraint += " and (" + condition + ")" Constraint = Constraint.replace("\n", " ") url = f"http://{config._CLEAN_IP}/exawebapi/exatime/GetCleaningData?ItemsInfo=%s&SamplingTimePeriods=%s&Constraint=%s&SamplingPeriod=%s&DCount=6" % ( ItemsInfo, SamplingTimePeriods, Constraint, interval) response = requests.get(url) content = json.loads(response.text) data = np.array([item for item in content["ClearData"]]).T # test data time_data = np.array([item for item in content["TimeData"]]).reshape(data.shape[0], -1) test_type = info["Test_Type"] limit_value = info["Limit_Value"] number_sample = info["number_sample"] try: expand = info["expand"] # sort_data = info["sort_data"] except KeyError: expand = True # sort_data = True spe_recon = False far, fdr, amplitude = [], [], [] recon = [] result = {} for _, ass in enumerate(info["assess"]): number_fault_variable = ass["number_fault_variable"] low_f = [float(item) for item in ass["low_f"].split(',')] high_f = [float(item) for item in ass["high_f"].split(',')] result_temp = pca(model, lock, data, number_sample, number_fault_variable, low_f, high_f, test_type, limit_value, time_data, spe_recon, expand) t_index = np.nonzero(high_f)[0][0]#索引high_f中第一个非0数位置的行数;在矩阵的第3行,t_index == 2 far.append(result_temp["far_variable"][t_index]) fdr.append(result_temp["fdr_variable"][t_index]) recon.append(result_temp["Reconstruction_precision"][t_index]) amplitude.append(high_f[t_index]) try: index = Test_Data["time"].index(",") result["time"] = Test_Data["time"][:index:] except KeyError: result["time"] = sorted(res["trainTime"], key=lambda item: item["st"])[0]["st"] result["BeforeCleanSamNum"] = content["BeforeCleanSamNum"] result["AfterCleanSamNum"] = content["AfterCleanSamNum"] result["amplitude"] = amplitude result["CleanOrNot"] = True result["far_variable"] = far result["fdr_variable"] = fdr result["Reconstruction_precision"] = recon return result except Exception as e: msg = traceback.format_exc() result = [{"CleanOrNot": False, 'msg': msg}] return result # if __name__ == "__main__": info_str = {"expand":False,"Test_Data":{"time":"2021-07-14 15:41:04,2021-07-15 15:41:04","points":"DH4_40LCA30CT304,DH4_40LCA30CT303,DH4_40LCA10CT301,DH4_40LCJ50CT301,DH4_40LCA30CT305,DH4_40LCJ50AA101GT,DH4_40LCJ60CT301,DH4_40LCA70CT301,DH4_40LCA70CT302","interval":300000},"dead":"1,1,1,1,1,1,1,1,1","limit":"0,0,0,0,0,0,0,0,0","Model_id":"292","Model_alg":"PCA","number_sample":5000,"assess":[{"number_fault_variable":1,"low_f":"3.31,0,0,0,0,0,0,0,0","high_f":"3.31,0,0,0,0,0,0,0,0"},{"number_fault_variable":1,"low_f":"0,2.78,0,0,0,0,0,0,0","high_f":"0,2.78,0,0,0,0,0,0,0"},{"number_fault_variable":1,"low_f":"0,0,3.34,0,0,0,0,0,0","high_f":"0,0,3.34,0,0,0,0,0,0"},{"number_fault_variable":1,"low_f":"0,0,0,3.44,0,0,0,0,0","high_f":"0,0,0,3.44,0,0,0,0,0"},{"number_fault_variable":1,"low_f":"0,0,0,0,3.54,0,0,0,0","high_f":"0,0,0,0,3.54,0,0,0,0"},{"number_fault_variable":1,"low_f":"0,0,0,0,0,4.08,0,0,0","high_f":"0,0,0,0,0,4.08,0,0,0"},{"number_fault_variable":1,"low_f":"0,0,0,0,0,0,3.31,0,0","high_f":"0,0,0,0,0,0,3.31,0,0"},{"number_fault_variable":1,"low_f":"0,0,0,0,0,0,0,2.75,0","high_f":"0,0,0,0,0,0,0,2.75,0"},{"number_fault_variable":1,"low_f":"0,0,0,0,0,0,0,0,3.05","high_f":"0,0,0,0,0,0,0,0,3.05"}],"condition":"[DH4_40DEH-01FU101]>300 and [DH4_40DEH-01FU101]<600 ","Test_Type":"FAI","Limit_Value":1.359,"uplow":"null,null;null,null;null,null;null,null;null,null;null,null;null,null;null,null;null,null","number":0,"version":"v-2020-09-03 18:03:22"} res = main(info_str) print("aaa")