# -*- coding: utf-8 -*-
"""
 @Time    : 2020/6/14 16:42
 @Author  : 杰森·家乐森
 @File    : sae_diagnosis.py
 @Software: PyCharm
"""
import json
import time
import torch
import itertools
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.utils.data as Data
from torch.autograd import Variable
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from matplotlib import pyplot as plt
from warnings import warn
from scipy.stats import f


class Diagnosis(nn.Module):
    def __init__(self, model, dir):
        super(Diagnosis, self).__init__()
        self.model = model
        self.dir = torch.from_numpy(dir).float()
        self.tile = nn.Parameter(torch.ones(dir.shape[0], 1))
        for p in self.parameters():
            p.requires_grad = False
        self.amp = nn.Parameter(torch.zeros(*dir.shape))

    def forward(self, x):
        input = self.tile.mm(x) + self.dir * self.amp
        output = predict(input, self.model)
        return input, output


class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss(reduce=False, size_average=False)

    def forward(self, y_true, y_pred):
        mse_loss = self.mse(y_true, y_pred)
        return torch.sqrt(mse_loss).data.numpy()[0][0]


def mse_list(y_true, y_pre):
    return torch.sqrt(torch.mean(torch.pow(y_true - y_pre, 2), dim=1))


def predict(data, model):
    k = (len(model['weights']) // 2) - 1
    output = [data]
    for i in range(len(model['weights'])):
        # if i == k:
        #     output.append(output[i].mm(model['weights'][i]) + model["bias"][i])
        # else:
        output.append(torch.sigmoid(output[i].mm(model['weights'][i]) + model["bias"][i]))
    return output[-1]


def get_dir_array(data_dim, fault_num):
    """
    获取方向矩阵
    :param data_dim: 样本维度
    :param fault_num: 故障个数
    :return: 方向矩阵
    """
    cols = np.array(list(itertools.combinations(range(data_dim), fault_num)))
    rows = np.array([[i] * fault_num for i in range(cols.shape[0])])
    array = np.zeros([cols.shape[0], data_dim])
    array[rows, cols] = 1
    return array


def bessel_correction(x, y):
    n1 = x.shape[0] - 1
    if y is None:
        n2 = 0
    else:
        n2 = y.shape[0] - 1
    return n1, n2


def pooled_covariance_matrix(x, y, bessel=True):
    """ pooled covariance
    Compute the pooled covariance matrix
    Equation:
    The pooled covariance matrix is defined as:
    .. math::
        S =  \\frac{n_xS_x + n_yS_y}{n_x+n_y}
    And with bessel correction as:
    .. math::
        S =  \\frac{(n_x-1)S_x + (n_y-1)S_y}{n_x+n_y-2}
    Reference
    ---------
    see: https://en.wikipedia.org/wiki/Hotelling%27s_T-squared_distribution#Pooled_covariance_matrix
    :param x: array-like, samples of observations
    :param y: array-like, samples of observations
    :param bessel: bool, apply bessel correction (default)
    :return: float, the pooled variance
    """
    _, *p = x.shape
    p = p[0] if p else 1

    if bessel:
        n1, n2 = bessel_correction(x, y)
    else:
        n1 = x.shape[0]
        n2 = y.shape[0]
    try:
        s1 = n1 * x.cov()
    except AttributeError:
        s1 = n1 * np.cov(x, rowvar=False)
    try:
        s2 = n2 * y.cov()
    except AttributeError:
        s2 = n2 * np.cov(y, rowvar=False)
    s = (s1 + s2) / (n1 + n2)

    return s


def inverse_covariance_matrix(x, y, bessel=True):
    """
    :param x: array-like, samples of observations
    :param y: array-like, samples of observations
    :param bessel: bool, apply bessel correction (default)
    :return: float, the pooled variance inverse, the pooled variance
    """
    _, *p = x.shape
    p = p[0] if p else 1
    s = pooled_covariance_matrix(x, y, bessel)
    inv = np.linalg.solve(s, np.identity(p))
    return inv, s


def hotelling_t2(x, y=None, bessel=True, S=None):
    """
    Compute the Hotelling (T2) test statistic.
    It is the multivariate extension of the Student's t-test.
    Test the null hypothesis that two multivariate samples have the same underlying
    probability distribution, when specifying samples for x and y. The number of samples do not have
    to be the same, but the number of features does have to be equal.
    Equation:
    Hotelling's t-squared statistic is defined as:
    .. math::
        T^2 = n (\\bar{x} - {\mu})^{T} S^{-1} (\\bar{x} - {\mu})
    Where S is the pooled covariance matrix and ᵀ represents the transpose.
    The two sample t-squared statistic is defined as:
    .. math::
        T^2 = (\\bar{x} - \\bar{y})^{T} [S(\\frac1 n_x +\\frac 1 n_y)]^{-1} (\\bar{x}̄ - \\bar{y})
    References:
        - Hotelling, Harold. (1931). The Generalization of Student's Ratio. Ann. Math. Statist. 2, no. 3, 360--378.
          doi:10.1214/aoms/1177732979. https://projecteuclid.org/euclid.aoms/1177732979
        - Hotelling, Harold. (1955) Les Rapports entre les Methodes Statistiques recentes portant sur des Variables Multiples
          et l'Analyse Factorielle. 107-119.
          In: L'Analyse Factorielle et ses Applications. Centre National de la Recherche Scientifique, Paris.
        - Anderson T.W. (1992) Introduction to Hotelling (1931) The Generalization of Student’s Ratio.
          In: Kotz S., Johnson N.L. (eds) Breakthroughs in Statistics.
          Springer Series in Statistics (Perspectives in Statistics). Springer, New York, NY
    :param x: array-like, samples of observations for one or two sample test (required)
    :param y: for two sample test, array-like, samples of observations (optional), for one sample, list of means to test
    :param bessel: bool, apply bessel correction (default)
    :return:
        statistic: float,
            the t2 statistic
        f_value: float,
            the f value
        p_value: float,
            the p value
        s: 2d array,
            the pooled variance
    """

    try:
        nx, p = x.shape
    except AttributeError as ex:
        if "list" in str(ex):
            x = np.asarray(x)
            nx, *p = x.shape
            p = p[0] if p else 1
            y = np.asarray(y)
        else:
            warn(f"Error: The two samples must be in arrays or dataframes format.")
            raise ValueError

    # samples observed means
    x_bar = x.mean(0)

    one_sample = False
    if y is None:
        # One sample T-squared
        one_sample = True
        y = np.zeros(p)
        ny = None
        py = p
        diff_bar = x_bar - y

    else:
        ny, *py = y.shape
        if len(py) == 0:
            one_sample = True
            py = p
            diff_bar = x_bar - y
        else:
            # Two sample T-squared
            py = py[0] if py else 1
            y_bar = y.mean(0)
            # difference of means
            diff_bar = x_bar - y_bar
    if p != py:
        warn(
            f"Error: the two samples must have the same number of features ({p} != {py})."
        )
        raise ValueError

    # bessel correction ( -1 )
    if bessel:
        n1, n2 = bessel_correction(x, y)
    else:
        n1 = nx
        n2 = ny
    if one_sample:
        n = nx
    else:
        n = n1 + n2

    # calculate the T2 statistics
    # Technically, we use diff_bar.T for the transpose, but with Pandas, a 1 dimensional dataframe
    # is automatically aligned for @ and is not required
    if one_sample:
        if S is not None:
            cov = S
        else:
            try:
                cov = x.cov()
            except AttributeError:
                cov = np.cov(x, rowvar=False)
        inv_cov = np.linalg.pinv(cov)
        t2_stat = n * (diff_bar.T @ inv_cov @ diff_bar)
        if S is not None:
            return t2_stat
        # f statistic
        # TODO: use chi square instead of f statistic for large sample
        f_value = (n - p) * t2_stat / ((n - 1) * p)
    else:
        # pooled covariance
        inv_s, s = inverse_covariance_matrix(x, y, bessel)
        t2_stat = nx * ny / (nx + ny) * (diff_bar.T @ inv_s @ diff_bar)
        # f statistic
        # TODO: use chi square instead of f statistic for large sample
        f_value = (nx + ny - p - 1) * t2_stat / (n * p)

    # p-value
    p_value = f.sf(f_value, p, n - p)  # survival function, 1 - cdf

    # return the list of results
    return t2_stat, f_value, p_value, cov if one_sample else s


def rb_diagnosis(model, data, spe, dir_array, epochs=8000):
    """
    重构诊断
    :param model: 建模模型
    :param data: 故障数据
    :param spe: spe限值
    :param dir_array: 方向矩阵
    :param epochs: 迭代次数
    :return: 故障矩阵
    """
    data = np.atleast_2d(data)
    diagnosis = Diagnosis(model, dir_array)
    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, diagnosis.parameters()), lr=0.4)
    loss_func = nn.MSELoss()
    data = torch.from_numpy(data).float()
    dataset = Data.TensorDataset(data, data)
    train_set = Data.DataLoader(dataset=dataset)
    # 训练模型
    for epoch in range(epochs):
        for step, (x, y) in enumerate(train_set):
            b_x = Variable(x).float()
            b_y = Variable(x).float()
            b_label = Variable(y)

            compensate, decoded = diagnosis(b_x)
            if step == 0 and epoch % 100 == 0 and (mse_list(decoded, compensate) < spe).nonzero().shape[0] > 0:
                break
            loss = loss_func(decoded, compensate)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        else:
            continue
        break
    diagnosis.eval()
    com, pre = diagnosis(data)

    return dir_array[mse_list(com, pre) < spe], (com - data).data.numpy()[mse_list(com, pre) < spe]


def diagnosis(model, data, spe):
    """
    诊断函数
    :param model: 建模模型
    :param data: 诊断数据
    :param spe: spe限值
    :return: 故障矩阵
    """
    for i in range(len(model['weights'])):
        model['weights'][i] = torch.tensor(model['weights'][i])
        model['bias'][i] = torch.tensor(model['bias'][i])
    test_data = torch.from_numpy(data).float()
    predict_data = predict(test_data, model)
    mse = mse_list(predict_data, test_data).data.numpy()
    dection_array = np.zeros(data.shape)
    amp_array = np.zeros(data.shape)
    # plt.hlines(spe, 0, 1000)
    # plt.plot(mse)
    # plt.show()
    it = iter(np.where(mse > spe)[0])
    for i in it:
        time1 = time.time()
        for j in range(data.shape[1] - 1):
            dir = get_dir_array(data.shape[1], j + 1)
            fault_dir, com_amp = rb_diagnosis(model, data[i, :], spe, dir)
            if fault_dir.shape[0] > 0:
                dection_array[i, :] = fault_dir[0, :]
                amp_array[i, :] = com_amp[0, :]
                time2 = time.time()
                print("第%d轮诊断完成" % i)
                print("耗时%f" % (time2 - time1))
                print(fault_dir[0, :])
                print(com_amp[0, :])
                break
    return dection_array, amp_array