You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
294 lines
15 KiB
294 lines
15 KiB
# -*- coding: utf-8 -*-
|
|
|
|
import numpy as np
|
|
import traceback
|
|
import pandas as pd
|
|
from json import JSONDecodeError
|
|
from scipy.stats import norm
|
|
from scipy.stats import f
|
|
from scipy.stats.distributions import chi2
|
|
import json
|
|
import sys
|
|
import requests
|
|
import datetime
|
|
import jenkspy
|
|
import xlrd
|
|
from smote import smote
|
|
import config
|
|
from recon import sequence,sequence_d
|
|
|
|
|
|
def train(XMat, p):
|
|
m = np.array(XMat).shape[1] # 取参数个数,即矩阵列数
|
|
average = np.mean(XMat, axis=0) # axis=0,分别对各列求均值
|
|
std = np.std(XMat, axis=0) # axis=0,计算各列的标准差
|
|
m, n = np.shape(XMat) # 取矩阵的行数m和列数n
|
|
avgs = np.tile(average, (m, 1)) # 将average的数乘1倍,再以此为基,复制到每一行,变成m行的矩阵
|
|
stds = np.tile(std, (m, 1)) # 将std的数乘1倍,再以此为基,复制到每一行,变成m行的矩阵
|
|
data_adjust = np.divide(XMat - avgs, stds) # 计算XMat-avgs,即每个元素与列平均值之间的差,再用此差除以每列数据的标准差
|
|
covX = np.cov(data_adjust.T) # 计算协方差矩阵(对称阵)
|
|
# corr=np.corrcoef(data_adjust.T)
|
|
featValue, featVec = np.linalg.eig(covX) # 求解协方差矩阵的特征值和特征向量
|
|
# ############可能协方差为0(一列数据相同,在训练的时候不要选择一样的数据,就不能除。############################
|
|
featValue = np.real(featValue) # 返回复杂参数的实部
|
|
featVec = np.real(featVec) # 返回复杂参数的实部
|
|
index = np.argsort(-featValue) # 按照featValue进行从大到小排序 -featValue转置后把元素变成其相反数
|
|
featValue = featValue[index] # 按照排序进行重建
|
|
featVec = featVec[:, index]
|
|
featValue_sum = np.divide(featValue, np.sum(featValue)) # 特征值分别除以特征值之和
|
|
per = 0 # 特征值百分比
|
|
k = 0 # 主元个数
|
|
for precent in featValue_sum:
|
|
per += precent
|
|
k = k + 1
|
|
if per > p:
|
|
break
|
|
# 记录主元数小与设定值p的总值和个数
|
|
finalData = []
|
|
if k > n: # 如果k比总数大,就返回 k必须小于特征数
|
|
print
|
|
"k must lower than feature number"
|
|
return
|
|
else:
|
|
# 注意特征向量是列向量,而numpy的二维矩阵(数组)a[m][n]中,a[1]表示第1行值
|
|
selectVec = np.matrix(featVec[:, :k]) # 所以这里需要进行转置 取第k列的特征向量,将ndarray对象转为matrix矩阵
|
|
finalData = np.dot(data_adjust, selectVec).dot(selectVec.T) # 将data_adjust,selectVec,selectVec.T三者相乘
|
|
reconData = np.add(np.multiply(finalData, stds), avgs) # 重构值 将finalData和stds相乘,然后与avgs相加
|
|
Train_X_min = np.min(XMat, axis=0) # 训练值最小值
|
|
Train_X_max = np.max(XMat, axis=0) # 训练值最大值
|
|
Train_X_mean = np.mean(XMat, axis=0) # 训练值平均值
|
|
Train_X_std = np.std(XMat, axis=0) # 训练值方差
|
|
Train_X_bais = XMat - reconData # 训练值偏差
|
|
Train_X_bais_max = np.max(np.abs(Train_X_bais), axis=0) # 训练值偏差最大值 axis=0 对各列求
|
|
Train_X_bais_min = np.min(np.abs(Train_X_bais), axis=0) # 训练值偏差最小值
|
|
Train_X_bais_mean = np.mean(np.abs(Train_X_bais), axis=0) # 训练值偏差平均值
|
|
Train_X_bais_std_upperB95 = np.array(np.abs(1.96 * np.std(Train_X_bais, axis=0) + Train_X_bais_mean))[
|
|
0] # 训练值偏差标准差
|
|
Train_X_bais_std_upperB99 = np.array(np.abs(2.58 * np.std(Train_X_bais, axis=0) + Train_X_bais_mean))[0]
|
|
Train_X_bais_std_lowerB95 = np.array(np.abs(1.96 * np.std(Train_X_bais, axis=0) - Train_X_bais_mean))[
|
|
0] # 训练值偏差标准差
|
|
Train_X_bais_std_lowerB99 = np.array(np.abs(2.58 * np.std(Train_X_bais, axis=0) - Train_X_bais_mean))[0]
|
|
QCUL_95_line = [] # 限值
|
|
QCUL_99_line = []
|
|
for index1 in range(len(Train_X_bais_std_upperB95)):
|
|
QCUL_95_line.append(max(Train_X_bais_std_upperB95[index1], Train_X_bais_std_lowerB95[index1]))
|
|
QCUL_99_line.append(max(Train_X_bais_std_upperB99[index1], Train_X_bais_std_lowerB99[index1]))
|
|
QCUL_95_line = np.array(QCUL_95_line)
|
|
QCUL_99_line = np.array(QCUL_99_line)
|
|
#################################################################################
|
|
# 计算阈值----------------QUCL--------------------################################################
|
|
theta1 = np.sum(featValue[k:])
|
|
theta2 = np.sum(np.power(featValue[k:], 2))
|
|
theta3 = np.sum(np.power(featValue[k:], 3))
|
|
h0 = 1 - 2 * theta1 * theta3 / (3 * np.power(theta2, 2))
|
|
ca_95 = norm.ppf(0.95, loc=0, scale=1)
|
|
QCUL_95 = theta1 * np.power(
|
|
h0 * ca_95 * np.sqrt(2 * theta2) / theta1 + 1 + theta2 * h0 * (h0 - 1) / np.power(theta1, 2),
|
|
1 / h0) # 置信域为百分之95
|
|
# QCUL_95_line = Train_X_bais_std*2.58 # +Train_X_mean#反归一化阈值
|
|
ca_99 = norm.ppf(0.99, loc=0, scale=1)
|
|
QCUL_99 = theta1 * np.power(
|
|
(h0 * ca_99 * np.sqrt(2 * theta2) / theta1 + 1 + theta2 * h0 * (h0 - 1) / np.power(theta1, 2)),
|
|
1 / h0) # 置信域为百分之99
|
|
# QCUL_99_line = Train_X_bais_std*1.96 # + Train_X_mean # 反归一化阈值
|
|
|
|
# 计算阈值----------------T2UCL--------------------###########################################
|
|
f_95 = f.ppf(0.95, k, m - k)
|
|
T2CUL_95 = k * (m - 1) * (m + 1) * f_95 / (m * (m - k)) # 置信域为百分之95
|
|
T2CUL_95_line = np.sqrt(T2CUL_95) * Train_X_std / np.sqrt(m) # +Train_X_mean#反归一化阈值
|
|
f_99 = f.ppf(0.99, k, m - k)
|
|
T2CUL_99 = k * (m - 1) * (m + 1) * f_99 / (m * (m - k)) # 置信域为百分之99
|
|
T2CUL_99_line = np.sqrt(T2CUL_99) * Train_X_std / np.sqrt(m) # +Train_X_mean#反归一化阈值
|
|
|
|
# 计算阈值----------------综合--------------------#################################################
|
|
gfi_95 = (k / pow(T2CUL_95, 2) + theta2 / pow(QCUL_95, 2)) / (k / T2CUL_95 + theta1 / QCUL_95)
|
|
hfi_95 = pow((k / T2CUL_95 + theta1 / QCUL_95), 2) / (k / pow(T2CUL_95, 2) + theta2 / pow(QCUL_95, 2))
|
|
Kesi_95 = gfi_95 * chi2.ppf(0.95, hfi_95) # 卡方分布
|
|
Kesi_95_line = np.sqrt(Kesi_95) * Train_X_std / np.sqrt(m) # 反归一化阈值
|
|
|
|
gfi_99 = (k / pow(T2CUL_99, 2) + theta2 / pow(QCUL_99, 2)) / (k / T2CUL_99 + theta1 / QCUL_99)
|
|
hfi_99 = pow((k / T2CUL_99 + theta1 / QCUL_99), 2) / (k / pow(T2CUL_99, 2) + theta2 / pow(QCUL_99, 2))
|
|
Kesi_99 = gfi_99 * chi2.ppf(0.99, hfi_99) # 卡方分布
|
|
Kesi_99_line = np.sqrt(Kesi_99) * Train_X_std / np.sqrt(m) # 反归一化阈值
|
|
|
|
# 计算对应的指标矩阵
|
|
numbel_variable = featValue.shape[0]
|
|
selectVec = featVec[:, 0:k]
|
|
featValue_sort = featValue # [index] # 排序后的特征值
|
|
C_ = np.eye(numbel_variable) - np.dot(selectVec, selectVec.T)
|
|
X_SPE = C_.T
|
|
# 99
|
|
II99 = featValue_sort.copy()
|
|
II99[:k] = II99[:k] * T2CUL_99
|
|
II99[K:] = QCUL_99
|
|
# 95
|
|
II95 = featValue_sort.copy()
|
|
II95[:k] = II95[:k] * T2CUL_95
|
|
II95[K:] = QCUL_95
|
|
DIAG_Fai99 = np.linalg.inv(np.diag(II99))
|
|
DIAG_Fai95 = np.linalg.inv(np.diag(II95))
|
|
D_Fai = featVec.copy()
|
|
DIAG_T2 = np.linalg.inv(np.diag(featValue_sort[:k]))
|
|
D_T2 = selectVec.copy()
|
|
m_spe = X_SPE @ X_SPE.T
|
|
m_fai_99 = D_Fai @ DIAG_Fai99 @ D_Fai.T
|
|
m_fai_95 = D_Fai @ DIAG_Fai95 @ D_Fai.T
|
|
m_T2 = D_T2 @ DIAG_T2 @ D_T2.T
|
|
spe_recon = get_m(XMat.shape[1],m_spe)
|
|
fai_99_recon = get_m(XMat.shape[1],m_fai_99)
|
|
fai_95_recon = get_m(XMat.shape[1],m_fai_95)
|
|
T2_recon = get_m(XMat.shape[1],m_T2)
|
|
# cos检验值
|
|
R = per # 相关性
|
|
|
|
items = [('Train_X_min', np.around(Train_X_min, decimals=3).tolist()),
|
|
('Train_X_max', np.around(Train_X_max, decimals=3).tolist()),
|
|
('Train_X_std', np.around(Train_X_std, decimals=3).tolist()),
|
|
('Train_X_mean', np.around(Train_X_mean, decimals=3).tolist()),
|
|
('Train_X_bais_max', np.around(Train_X_bais_max, decimals=3).tolist()),
|
|
('Train_X_bais_min', np.around(Train_X_bais_min, decimals=3).tolist()),
|
|
('Train_X_bais_mean', np.around(Train_X_bais_mean, decimals=3).tolist()),
|
|
('QCUL_95', np.around(QCUL_95, decimals=10).tolist()),
|
|
('QCUL_99', np.around(QCUL_99, decimals=10).tolist()),
|
|
('QCUL_95_line', np.around(QCUL_95_line, decimals=3).tolist()),
|
|
('QCUL_99_line', np.around(QCUL_99_line, decimals=3).tolist()),
|
|
('T2CUL_95', np.around(T2CUL_95, decimals=3).tolist()),
|
|
('T2CUL_99', np.around(T2CUL_99, decimals=3).tolist()),
|
|
('T2CUL_95_line', np.around(T2CUL_95_line, decimals=3).tolist()),
|
|
('T2CUL_99_line', np.around(T2CUL_99_line, decimals=3).tolist()),
|
|
('Kesi_95', np.around(Kesi_95, decimals=3).tolist()),
|
|
('Kesi_99', np.around(Kesi_99, decimals=3).tolist()),
|
|
('Kesi_95_line', np.around(Kesi_95_line, decimals=3).tolist()),
|
|
('Kesi_99_line', np.around(Kesi_99_line, decimals=3).tolist()),
|
|
('speRecon', np.around(spe_recon, decimals=3).tolist()),
|
|
('T2Recon', np.around(T2_recon, decimals=3).tolist()),
|
|
('Fai95Recon', np.around(fai_95_recon, decimals=3).tolist()),
|
|
('Fai99Recon', np.around(fai_99_recon, decimals=3).tolist()),
|
|
('mSPE', np.around(m_spe, decimals=3).tolist()),
|
|
('mFai99', np.around(m_fai_99, decimals=3).tolist()),
|
|
('mFai95', np.around(m_fai_95, decimals=3).tolist()),
|
|
('mT2', np.around(m_T2, decimals=3).tolist()),
|
|
('COV', np.around(covX, decimals=3).tolist()),
|
|
('K', k),
|
|
('R', np.around(R, decimals=3).tolist()),
|
|
("featValue", np.around(featValue, decimals=3).tolist()),
|
|
("featVec", np.around(featVec, decimals=3).tolist()),
|
|
("selectVec", np.around(selectVec, decimals=3).tolist())]
|
|
# model_info=json.dumps(dict(items))
|
|
res_items = [('Model_info', dict(items)), ('Model_type', 'PCA')]
|
|
result = dict(res_items) # json.dumps(result)
|
|
return json.dumps(result)
|
|
|
|
|
|
def get_m(dimension, m):
|
|
ke_si_matrix = np.zeros(shape=(dimension, dimension))
|
|
for i in range(dimension):
|
|
for j in range(dimension):
|
|
ke_si_matrix[i, j] = m[i, j] / m[i, i]
|
|
return ke_si_matrix
|
|
|
|
|
|
def clearmain(info):
|
|
try:
|
|
Train_Data = info["Train_Data"]
|
|
condition=info["conditon"].replace("=","==").replace(">=",">").replace("<=","<")
|
|
times = Train_Data["time"].split(';')
|
|
points = Train_Data["points"].split(',')
|
|
interval = Train_Data["interval"]
|
|
if interval == 10000:
|
|
DCount = 60
|
|
elif interval == 100000:
|
|
DCount = 6
|
|
elif interval == 300000:
|
|
DCount = 5
|
|
else:
|
|
DCount = 4
|
|
dead = Train_Data["dead"].split(',')
|
|
limit = Train_Data["limit"].split(',')
|
|
uplower = Train_Data["uplow"].split(';')
|
|
percent = info["Hyper_para"]["percent"]
|
|
count=0
|
|
ItemsInfo, SamplingTimePeriods = [], []
|
|
Constraint = ""
|
|
for i in range(len(points)):
|
|
iteminfo = {}
|
|
iteminfo["ItemName"] = points[i] # 加点
|
|
if (dead[i] == "1"): # 判断是否参与死区清洗
|
|
iteminfo["ClearDeadZone"] = "true"
|
|
else:
|
|
iteminfo["ClearDeadZone"] = "false"
|
|
if (limit[i] == "1"): # 参与上下限清洗
|
|
limits = uplower[i].split(',')
|
|
if (isnumber(limits) == True): # 输入上下限正确 isnumber 是否为数字
|
|
count += 1
|
|
Constraint += "[" + points[i] + "]>" + limits[0] + " and " + "[" + points[i] + "]<" + limits[1] + " and "
|
|
ItemsInfo.append(iteminfo)
|
|
if(count!=0):
|
|
Constraint = Constraint[:len(Constraint) - 4:]
|
|
else:
|
|
Constraint="1==1"#没有上下限清洗
|
|
Constraint+=" and ("+condition+")"
|
|
for i in range(len(times)):
|
|
Eachsampletime = {}
|
|
timess = times[i].split(',')
|
|
Eachsampletime["StartingTime"] = timess[0]
|
|
Eachsampletime["TerminalTime"] = timess[1]
|
|
SamplingTimePeriods.append(Eachsampletime)
|
|
Constraint = Constraint.replace("\n", " ")
|
|
url = f"http://{config._CLEAN_IP}/exawebapi/exatime/GetCleaningData?ItemsInfo=%s&SamplingTimePeriods=%s&Constraint=%s&SamplingPeriod=%s&DCount=%d" % (
|
|
ItemsInfo, SamplingTimePeriods, Constraint, interval, DCount)
|
|
response = requests.get(url)
|
|
content = json.loads(response.text)
|
|
data = np.array([item for item in content["ClearData"]]).T
|
|
try:
|
|
smote_data = info["smote"]
|
|
# smote_data = False
|
|
except KeyError:
|
|
smote_data = False
|
|
if smote_data:
|
|
try:
|
|
smote_index = [points.index(item["pointId"]) for item in info["smote_config"] if item["LAY_CHECKED"]]
|
|
smote_num = [int(item["number"]) for item in info["smote_config"] if item["LAY_CHECKED"]]
|
|
max_value = [float(item["max"]) for item in info["smote_config"] if item["LAY_CHECKED"]]
|
|
min_value = [float(item["min"]) for item in info["smote_config"] if item["LAY_CHECKED"]]
|
|
except KeyError:
|
|
pass
|
|
else:
|
|
if len(smote_num) != 0:
|
|
data, *_ = smote(data, smote_index, smote_num, max_value, min_value)
|
|
result = train(data, percent)
|
|
result = result.replace("NaN", "-1")
|
|
result=json.loads(result)
|
|
result["BeforeCleanSamNum"]=content["BeforeCleanSamNum"]
|
|
result["AfterCleanSamNum"]=content["AfterCleanSamNum"]
|
|
result["CleanOrNot"] = True
|
|
return json.dumps(result)
|
|
except Exception as e:
|
|
result = [{"CleanOrNot": False, "msg": traceback.format_exc()}]
|
|
return json.dumps(result, ensure_ascii=False)
|
|
|
|
|
|
def test_offline(model, LockVariable, Data_origin):
|
|
test_data = (Data_origin - model["Train_X_mean"]) / model["Train_X_std"]
|
|
SPE_list = []
|
|
FAI_list = []
|
|
m_fai = np.array(model["mFai99"])
|
|
m_spe = np.array(model["mSPE"])
|
|
ke_si_matrix = np.array(model["Fai99Recon"])
|
|
line = model["Kesi_99"]
|
|
paraState = np.zeros(Data_origin.shape) # 故障方向矩阵
|
|
f_matrix = np.zeros(Data_origin.shape) # 故障幅值矩阵
|
|
for i in range(test_data.shape[0]):
|
|
data = test_data[i, :]
|
|
SPE_list.append(data @ m_spe @ data.T)
|
|
FAI_list.append(data @ m_fai @ data.T)
|
|
if data @ m_fai @ data.T > line:
|
|
t_c, f = sequence(data, line, m, ke_si_matrix) # 计算故障方向和幅值
|
|
paraState[i] = t_c
|
|
f_matrix[i] = f
|
|
final_data = test_data - f_matrix
|
|
recon_data = np.add(np.multiply(final_data, model["Train_X_std"]), model["Train_X_mean"]) # 重构值
|
|
error_data = Data_origin - recon_data
|
|
items = {"reconData": recon_data.tolist(), "errorData": error_data.tolist(), "R": 0, "SPE": SPE_list, "FAI": FAI_list,
|
|
"paraState": paraState.tolist()}
|
|
return json.dumps(items)
|
|
|