# -*- coding: utf-8 -*- """ @Time : 2020/6/14 16:42 @Author : 杰森·家乐森 @File : sae_diagnosis.py @Software: PyCharm """ import json import time import torch import itertools import numpy as np import pandas as pd import torch.nn as nn import torch.utils.data as Data from torch.autograd import Variable from sklearn.metrics import r2_score, mean_squared_error from sklearn.preprocessing import MinMaxScaler from matplotlib import pyplot as plt from warnings import warn from scipy.stats import f class Diagnosis(nn.Module): def __init__(self, model, dir): super(Diagnosis, self).__init__() self.model = model self.dir = torch.from_numpy(dir).float() self.tile = nn.Parameter(torch.ones(dir.shape[0], 1)) for p in self.parameters(): p.requires_grad = False self.amp = nn.Parameter(torch.zeros(*dir.shape)) def forward(self, x): input = self.tile.mm(x) + self.dir * self.amp output = predict(input, self.model) return input, output class RMSELoss(nn.Module): def __init__(self): super().__init__() self.mse = nn.MSELoss(reduce=False, size_average=False) def forward(self, y_true, y_pred): mse_loss = self.mse(y_true, y_pred) return torch.sqrt(mse_loss).data.numpy()[0][0] def mse_list(y_true, y_pre): return torch.sqrt(torch.mean(torch.pow(y_true - y_pre, 2), dim=1)) def predict(data, model): k = (len(model['weights']) // 2) - 1 output = [data] for i in range(len(model['weights'])): # if i == k: # output.append(output[i].mm(model['weights'][i]) + model["bias"][i]) # else: output.append(torch.sigmoid(output[i].mm(model['weights'][i]) + model["bias"][i])) return output[-1] def get_dir_array(data_dim, fault_num): """ 获取方向矩阵 :param data_dim: 样本维度 :param fault_num: 故障个数 :return: 方向矩阵 """ cols = np.array(list(itertools.combinations(range(data_dim), fault_num))) rows = np.array([[i] * fault_num for i in range(cols.shape[0])]) array = np.zeros([cols.shape[0], data_dim]) array[rows, cols] = 1 return array def bessel_correction(x, y): n1 = x.shape[0] - 1 if y is None: n2 = 0 else: n2 = y.shape[0] - 1 return n1, n2 def pooled_covariance_matrix(x, y, bessel=True): """ pooled covariance Compute the pooled covariance matrix Equation: The pooled covariance matrix is defined as: .. math:: S = \\frac{n_xS_x + n_yS_y}{n_x+n_y} And with bessel correction as: .. math:: S = \\frac{(n_x-1)S_x + (n_y-1)S_y}{n_x+n_y-2} Reference --------- see: https://en.wikipedia.org/wiki/Hotelling%27s_T-squared_distribution#Pooled_covariance_matrix :param x: array-like, samples of observations :param y: array-like, samples of observations :param bessel: bool, apply bessel correction (default) :return: float, the pooled variance """ _, *p = x.shape p = p[0] if p else 1 if bessel: n1, n2 = bessel_correction(x, y) else: n1 = x.shape[0] n2 = y.shape[0] try: s1 = n1 * x.cov() except AttributeError: s1 = n1 * np.cov(x, rowvar=False) try: s2 = n2 * y.cov() except AttributeError: s2 = n2 * np.cov(y, rowvar=False) s = (s1 + s2) / (n1 + n2) return s def inverse_covariance_matrix(x, y, bessel=True): """ :param x: array-like, samples of observations :param y: array-like, samples of observations :param bessel: bool, apply bessel correction (default) :return: float, the pooled variance inverse, the pooled variance """ _, *p = x.shape p = p[0] if p else 1 s = pooled_covariance_matrix(x, y, bessel) inv = np.linalg.solve(s, np.identity(p)) return inv, s def hotelling_t2(x, y=None, bessel=True, S=None): """ Compute the Hotelling (T2) test statistic. It is the multivariate extension of the Student's t-test. Test the null hypothesis that two multivariate samples have the same underlying probability distribution, when specifying samples for x and y. The number of samples do not have to be the same, but the number of features does have to be equal. Equation: Hotelling's t-squared statistic is defined as: .. math:: T^2 = n (\\bar{x} - {\mu})^{T} S^{-1} (\\bar{x} - {\mu}) Where S is the pooled covariance matrix and ᵀ represents the transpose. The two sample t-squared statistic is defined as: .. math:: T^2 = (\\bar{x} - \\bar{y})^{T} [S(\\frac1 n_x +\\frac 1 n_y)]^{-1} (\\bar{x}̄ - \\bar{y}) References: - Hotelling, Harold. (1931). The Generalization of Student's Ratio. Ann. Math. Statist. 2, no. 3, 360--378. doi:10.1214/aoms/1177732979. https://projecteuclid.org/euclid.aoms/1177732979 - Hotelling, Harold. (1955) Les Rapports entre les Methodes Statistiques recentes portant sur des Variables Multiples et l'Analyse Factorielle. 107-119. In: L'Analyse Factorielle et ses Applications. Centre National de la Recherche Scientifique, Paris. - Anderson T.W. (1992) Introduction to Hotelling (1931) The Generalization of Student’s Ratio. In: Kotz S., Johnson N.L. (eds) Breakthroughs in Statistics. Springer Series in Statistics (Perspectives in Statistics). Springer, New York, NY :param x: array-like, samples of observations for one or two sample test (required) :param y: for two sample test, array-like, samples of observations (optional), for one sample, list of means to test :param bessel: bool, apply bessel correction (default) :return: statistic: float, the t2 statistic f_value: float, the f value p_value: float, the p value s: 2d array, the pooled variance """ try: nx, p = x.shape except AttributeError as ex: if "list" in str(ex): x = np.asarray(x) nx, *p = x.shape p = p[0] if p else 1 y = np.asarray(y) else: warn(f"Error: The two samples must be in arrays or dataframes format.") raise ValueError # samples observed means x_bar = x.mean(0) one_sample = False if y is None: # One sample T-squared one_sample = True y = np.zeros(p) ny = None py = p diff_bar = x_bar - y else: ny, *py = y.shape if len(py) == 0: one_sample = True py = p diff_bar = x_bar - y else: # Two sample T-squared py = py[0] if py else 1 y_bar = y.mean(0) # difference of means diff_bar = x_bar - y_bar if p != py: warn( f"Error: the two samples must have the same number of features ({p} != {py})." ) raise ValueError # bessel correction ( -1 ) if bessel: n1, n2 = bessel_correction(x, y) else: n1 = nx n2 = ny if one_sample: n = nx else: n = n1 + n2 # calculate the T2 statistics # Technically, we use diff_bar.T for the transpose, but with Pandas, a 1 dimensional dataframe # is automatically aligned for @ and is not required if one_sample: if S is not None: cov = S else: try: cov = x.cov() except AttributeError: cov = np.cov(x, rowvar=False) inv_cov = np.linalg.pinv(cov) t2_stat = n * (diff_bar.T @ inv_cov @ diff_bar) if S is not None: return t2_stat # f statistic # TODO: use chi square instead of f statistic for large sample f_value = (n - p) * t2_stat / ((n - 1) * p) else: # pooled covariance inv_s, s = inverse_covariance_matrix(x, y, bessel) t2_stat = nx * ny / (nx + ny) * (diff_bar.T @ inv_s @ diff_bar) # f statistic # TODO: use chi square instead of f statistic for large sample f_value = (nx + ny - p - 1) * t2_stat / (n * p) # p-value p_value = f.sf(f_value, p, n - p) # survival function, 1 - cdf # return the list of results return t2_stat, f_value, p_value, cov if one_sample else s def rb_diagnosis(model, data, spe, dir_array, epochs=8000): """ 重构诊断 :param model: 建模模型 :param data: 故障数据 :param spe: spe限值 :param dir_array: 方向矩阵 :param epochs: 迭代次数 :return: 故障矩阵 """ data = np.atleast_2d(data) diagnosis = Diagnosis(model, dir_array) optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, diagnosis.parameters()), lr=0.4) loss_func = nn.MSELoss() data = torch.from_numpy(data).float() dataset = Data.TensorDataset(data, data) train_set = Data.DataLoader(dataset=dataset) # 训练模型 for epoch in range(epochs): for step, (x, y) in enumerate(train_set): b_x = Variable(x).float() b_y = Variable(x).float() b_label = Variable(y) compensate, decoded = diagnosis(b_x) if step == 0 and epoch % 100 == 0 and (mse_list(decoded, compensate) < spe).nonzero().shape[0] > 0: break loss = loss_func(decoded, compensate) optimizer.zero_grad() loss.backward() optimizer.step() else: continue break diagnosis.eval() com, pre = diagnosis(data) return dir_array[mse_list(com, pre) < spe], (com - data).data.numpy()[mse_list(com, pre) < spe] def diagnosis(model, data, spe): """ 诊断函数 :param model: 建模模型 :param data: 诊断数据 :param spe: spe限值 :return: 故障矩阵 """ for i in range(len(model['weights'])): model['weights'][i] = torch.tensor(model['weights'][i]) model['bias'][i] = torch.tensor(model['bias'][i]) test_data = torch.from_numpy(data).float() predict_data = predict(test_data, model) mse = mse_list(predict_data, test_data).data.numpy() dection_array = np.zeros(data.shape) amp_array = np.zeros(data.shape) # plt.hlines(spe, 0, 1000) # plt.plot(mse) # plt.show() it = iter(np.where(mse > spe)[0]) for i in it: time1 = time.time() for j in range(data.shape[1] - 1): dir = get_dir_array(data.shape[1], j + 1) fault_dir, com_amp = rb_diagnosis(model, data[i, :], spe, dir) if fault_dir.shape[0] > 0: dection_array[i, :] = fault_dir[0, :] amp_array[i, :] = com_amp[0, :] time2 = time.time() print("第%d轮诊断完成" % i) print("耗时%f" % (time2 - time1)) print(fault_dir[0, :]) print(com_amp[0, :]) break return dection_array, amp_array