model-lab/pca_test_by_rb_plot.py

# -*- coding: utf-8 -*-
"""
Created on Sun Feb 28 10:04:26 2016
PCA source code————最新更新————————————————————————————
@author:
"""

import time
import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.stats.distributions import chi2
import json
import sys
import pymssql
import requests
import datetime
from scipy.stats import norm
from scipy.stats import f
from scipy.stats import chi2
import jenkspy
import xlrd
from recon import Lars, recon_fault_diagnosis_r, recon_fault_diagnosis_r_l, recon_fault_diagnosis_r_c


"""
参数：
	- XMat：传入的是一个numpy的矩阵格式，行表示样本数，列表示特征    
	- k：表示取前k个特征值对应的特征向量
返回值：
	- finalData：参数一指的是返回的低维矩阵，对应于输入参数二
	- reconData：参数二对应的是移动坐标轴后的矩阵
"""


class MSSQL:
    def __init__(self, host, user, pwd, database):
        self.host = host
        self.user = user
        self.pwd = pwd
        self.db = database

    def __GetConnect(self):
        """
        得到连接信息
        返回: conn.cursor()
        """
        if not self.db:
            raise (NameError, "没有设置数据库信息")
        self.conn = pymssql.connect(host=self.host, user=self.user, password=self.pwd, database=self.db, charset="utf8")
        cur = self.conn.cursor()
        if not cur:
            raise (NameError, "连接数据库失败")
        else:
            return cur

    def ExecQuery(self, sql):
        """
        执行查询语句
        返回的是一个包含tuple的list，list的元素是记录行，tuple的元素是每行记录的字段
        """
        cur = self.__GetConnect()
        cur.execute(sql)
        resList = cur.fetchall()

        # 查询完毕后必须关闭连接
        self.conn.close()
        return resList

    def ExecNonQuery(self, sql):
        """
        执行非查询语句

        调用示例：
            cur = self.__GetConnect()
            cur.execute(sql)
            self.conn.commit()
            self.conn.close()
        """
        cur = self.__GetConnect()
        cur.execute(sql)
        self.conn.commit()
        self.conn.close()


def get_model_by_ID(model_id):
    ms = MSSQL(host="192.168.10.250", user="sa", pwd="powerSIS#123", database="alert")
    resList = ms.ExecQuery("SELECT Model_info FROM [alert].[dbo].[Model_CFG] where \"model_id\"=" + str(model_id))
    # return json.loads(resList[0][0])["para"]
    return json.loads(resList[0][0])


def pca(model, LockVariable, Data_origin):
    Data = (Data_origin - model["Train_X_mean"]) / model["Train_X_std"]
    featValue = np.array(model["featValue"])  # 训练数据的特征值
    featVec = np.array(model["featVec"])  # 训练数据的特征向量
    k = (model["K"])  # 主元个数
    # selectVec = np.array(model["selectVec"])
    selectVec = featVec[:, 0:k]  ####自己选择的
    # index = np.argsort(-np.array(featValue))  # 按照featValue进行从大到小排序
    featValue_sort = featValue  # [index]  # 排序后的特征值
    '''
    featValue, featVec = np.linalg.eig(model["COV"])  # 求解协方差矩阵的特征值和特征向量

    index = np.argsort(-featValue)  # 按照featValue进行从大到小排序
    featValue_sort =featValue[index]#排序后的特征值
    selectVec = np.matrix(featVec[:,index[:int(model["K"])]])  # 所以这里需要进行转置P
    '''
    ############----------*********-SPE-**************----------########################
    numbel_variable = featValue.shape[0]
    # LockVariable="3,5"

    C_ = np.eye(numbel_variable) - np.dot(selectVec, selectVec.T)
    X_SPE = C_.T
    D_SPE = C_
    DIAG_SPE = np.eye(numbel_variable)

    '''
    # ************************调用LARS*******************************
    t = 50000
    lamuta = 1
    limit_line = model["QCUL_99"]
    beta_path=[]
    for i in range(Data.shape[0]):
        Y=Data[i,:]
        beta, mse=Lars(X_SPE, Y, D_SPE, DIAG_SPE, t, limit_line, lamuta)
        beta_end=abs(beta[-1,:])
        jenk=jenkspy.jenks_breaks(beta_end,5)
        limt=(jenk[1]+jenk[2])/2
        index=np.where(beta_end>limt)[0]        
        beta_path.append(beta[-1,:])
    '''
    ############----------*********-T2-**************----------########################
    DIAG_T2 = np.linalg.pinv(np.diag(featValue_sort[:int(model["K"])]))
    D_T2 = selectVec.copy()
    X_T2 = np.dot(D_T2, np.linalg.cholesky(DIAG_T2)).T

    ############----------*********-综合指标-**************----------########################
    II = featValue_sort.copy()
    II[:int(model["K"])] = II[:int(model["K"])] * model["T2CUL_99"]
    II[int(model["K"]):] = model["QCUL_99"]
    DIAG_Fai = np.linalg.inv(np.diag(II))
    D_Fai = featVec.copy()
    X_Fai = np.dot(D_Fai, np.linalg.cholesky(DIAG_Fai)).T
    # ************************调用LARS*******************************
    t = 50000
    lamuta = 1
    # limit_line = model["Kesi_99"]/np.sqrt(numbel_variable)#修改
    limit_line = model["Kesi_99"]*1
    beta_path = []
    SPE_list = []
    FAI_list = []
    paraState = np.zeros([np.array(Data_origin).shape[0], np.array(Data_origin).shape[1]])
    if Data.shape[1] >= 12:
        para_length = 3
    elif 12 > Data.shape[1] >= 7:
        para_length = 2
    else:
        para_length = 1
    plots_matrix = []  # 贡献图法的矩阵
    plots_index = []  # 贡献图法的index
    # para_length = Data.shape[1]
    for i in range(Data.shape[0]):
        Y = Data[i, :]  # 测试数据的每一行
        if i == 1051:
            print("aaa")
        #########*********************计算SPE******************************
        SPE_line = np.dot(Y, C_).dot(Y.T)
        SPE_list.append(SPE_line)
        #########################计算综合指标##########
        temp = np.dot(Y.T, D_Fai).dot(DIAG_Fai).dot(D_Fai.T).dot(Y)
        if temp > 100:
            print("aa")
        FAI_list.append(np.dot(Y.T, D_Fai).dot(DIAG_Fai).dot(D_Fai.T).dot(Y))
        # **************计算LARS***************
        beta, mse = Lars(X_Fai, Y, D_Fai, DIAG_Fai, t, limit_line, LockVariable)
        beta_end = abs(beta[-1, :])
        pi = len(beta_end)
        if pi > 7:
            jenk = jenkspy.jenks_breaks(beta_end, 5)
        else:
            jenk = jenkspy.jenks_breaks(beta_end, 2)
        limt = (jenk[1] + jenk[2]) / 2
        index = np.where(beta_end > 0)[0]
        if len(index) > para_length:
            # 如果Lars诊断出来的故障个数大于限值则进一步重构
            res = recon_fault_diagnosis_r_c(Y, D_Fai @ DIAG_Fai @ D_Fai.T, limit_line, list(zip(index, beta_end[index])), model,
                                          True,  X_SPE @ X_SPE.T, LockVariable, selectVec, rbc=None)
            if not isinstance(res[0], list):
                # 如果返回的res[0]是array类型则说明重构有结果,如果没有重构出来则返回的是[]
                # 如果是采用的贡献图法进行重构则res[1]为plot,否则是array类型,表示故障参数的index
                if res[1] == "plot":
                    # beta[-1, :] = res[0]
                    plots_matrix.append(res[0])
                    plots_index.append(i)
                else:
                    beta[-1, :], index = res[0].T, res[1]
        elif len(index) <= para_length and len(index) != 0:
            res = recon_fault_diagnosis_r_l(Y, D_Fai @ DIAG_Fai @ D_Fai.T, index)
            beta[-1, :], index = res[0].T, res[1]
        paraState[i, index] = 1
        beta_new = beta[-1, :] * paraState[i, :]
        beta_path.append(beta_new)
    beta_path = np.array(beta_path)
    # ###############-------------------------------------------------------------###############
    # finalData = np.dot(Data - beta_path, selectVec).dot(selectVec.T)
    finalData = Data - beta_path
    reconData = np.add(np.multiply(finalData, model["Train_X_std"]), model["Train_X_mean"])  # 重构值
    if plots_index:
        # 如果不为[],则说明重构的时候采用了贡献图法,则需要将对应的重构值替换成贡献图法的重构结果.
        reconData[plots_index] = plots_matrix
    errorData = Data_origin - reconData  # 偏差值

    # cos检验值
    R = 0
    for index in range(0, reconData.shape[1]):
        vector1 = Data_origin[:, index]
        vector2 = np.array(reconData)[:, index]
        R += np.dot(vector1, vector2.T) / (np.sqrt(np.sum(vector1 ** 2)) * np.sqrt(np.sum(vector2 ** 2)))
    R /= reconData.shape[1]
    items = [('sampleData',np.transpose(np.array(Data_origin)).tolist()), ('reconData', reconData.T.tolist())
        , ('errorData', errorData.T.tolist()), ('R', R.tolist()), ('SPE', FAI_list),
             ('paraState', paraState.tolist())]
    result = dict(items)  # json.dumps(result)
    return result


def get_history_value(points, time, interval):
    # url="http://192.168.1.201:8080/openPlant/getMultiplePointHistorys"
    url = "http://10.35.2.238:9000/exawebapi/exatime/GetSamplingValueArrayFloat"
    headers = {"Content-Type": "application/json;charset=utf-8"}  # ,"token":get_token()
    point_array = points.split(",")
    time_span = time.split(";")
    value_array = []
    for item in point_array:
        for time_piece in time_span:
            st = time_piece.split(",")[0]
            et = time_piece.split(",")[1]
            para = {"ItemName": item, "StartingTime": st, "TerminalTime": et, "SamplingPeriod": interval}
            response = requests.get(url, headers=headers, params=para)
            content = response.text.replace('"[', '[').replace(']"', ']')
            value = json.loads(content)
            value_group = []
            for row in value:
                value_group.append(row[1])
        value_array.append(value_group)
    return np.transpose(np.array(value_array))


if __name__ == "__main__":

    info_str = '{"Test_Data":{"time":"2021-07-18 10:34:16,2021-07-19 10:34:16","points":"JL_D2_20DAS05A:LBS60CP101.PNT,JL_D2_20DAS06A:LCJ60CT301.PNT,JL_D2_20DAS06A:LCA30CT305.PNT,JL_D2_20DAS06A:LCA30CT303.PNT,JL_D2_20DAS06A:LCA30CT304.PNT,JL_D2_20DAS06A:LCJ51CT301.PNT","interval":300000,"AddBias":[0.0,0.0,0.0,0.0,0.0,0.0],"AddBias_Time":"2021-07-18 10:34:16,2021-07-19 10:34:16"},"Model_id":770,"version":"v-2021-04-02 10:26:32"}'
    info = json.loads(info_str)
    model_id = info["Model_id"]
    version = info["version"]
    import PCA_Test
    if version == "v-test":
        res = PCA_Test.get_model_by_ID(model_id, version)
    else:
        res = PCA_Test.get_model_by_id_and_version(model_id, version)
    filename = res["algorithm"]
    if filename == "PCA":
        lock = []
        point_info = res["pointInfo"]
        for i in range(len(point_info)):
            try:
                if point_info[i]["lock"]:
                    lock.append(i)
            except:
                continue
        lock = []
        Test_Data = info["Test_Data"]
        points = Test_Data["points"]
        time1 = Test_Data["time"]
        interval = Test_Data["interval"]
        model = res["para"]["Model_info"]
        Data = get_history_value(points, time1, interval)
        t1 = time.time()
        result = pca(model, lock, Data)
        t2 = time.time()
        print(t2 - t1)
        # print(result)
feat: ✨ 初始化代码 2 weeks ago			`# -- coding: utf-8 --`
			`"""`
			`Created on Sun Feb 28 10:04:26 2016`
			`PCA source code————最新更新————————————————————————————`
			`@author:`
			`"""`

			`import time`
			`import numpy as np`
			`import pandas as pd`
			`from scipy.stats import norm`
			`from scipy.stats.distributions import chi2`
			`import json`
			`import sys`
			`import pymssql`
			`import requests`
			`import datetime`
			`from scipy.stats import norm`
			`from scipy.stats import f`
			`from scipy.stats import chi2`
			`import jenkspy`
			`import xlrd`
			`from recon import Lars, recon_fault_diagnosis_r, recon_fault_diagnosis_r_l, recon_fault_diagnosis_r_c`


			`"""`
			`参数：`
			`- XMat：传入的是一个numpy的矩阵格式，行表示样本数，列表示特征`
			`- k：表示取前k个特征值对应的特征向量`
			`返回值：`
			`- finalData：参数一指的是返回的低维矩阵，对应于输入参数二`
			`- reconData：参数二对应的是移动坐标轴后的矩阵`
			`"""`





			`class MSSQL:`
			`def __init__(self, host, user, pwd, database):`
			`self.host = host`
			`self.user = user`
			`self.pwd = pwd`
			`self.db = database`

			`def __GetConnect(self):`
			`"""`
			`得到连接信息`
			`返回: conn.cursor()`
			`"""`
			`if not self.db:`
			`raise (NameError, "没有设置数据库信息")`
			`self.conn = pymssql.connect(host=self.host, user=self.user, password=self.pwd, database=self.db, charset="utf8")`
			`cur = self.conn.cursor()`
			`if not cur:`
			`raise (NameError, "连接数据库失败")`
			`else:`
			`return cur`

			`def ExecQuery(self, sql):`
			`"""`
			`执行查询语句`
			`返回的是一个包含tuple的list，list的元素是记录行，tuple的元素是每行记录的字段`
			`"""`
			`cur = self.__GetConnect()`
			`cur.execute(sql)`
			`resList = cur.fetchall()`

			`# 查询完毕后必须关闭连接`
			`self.conn.close()`
			`return resList`

			`def ExecNonQuery(self, sql):`
			`"""`
			`执行非查询语句`

			`调用示例：`
			`cur = self.__GetConnect()`
			`cur.execute(sql)`
			`self.conn.commit()`
			`self.conn.close()`
			`"""`
			`cur = self.__GetConnect()`
			`cur.execute(sql)`
			`self.conn.commit()`
			`self.conn.close()`


			`def get_model_by_ID(model_id):`
			`ms = MSSQL(host="192.168.10.250", user="sa", pwd="powerSIS#123", database="alert")`
			`resList = ms.ExecQuery("SELECT Model_info FROM [alert].[dbo].[Model_CFG] where \"model_id\"=" + str(model_id))`
			`# return json.loads(resList[0][0])["para"]`
			`return json.loads(resList[0][0])`


			`def pca(model, LockVariable, Data_origin):`
			`Data = (Data_origin - model["Train_X_mean"]) / model["Train_X_std"]`
			`featValue = np.array(model["featValue"]) # 训练数据的特征值`
			`featVec = np.array(model["featVec"]) # 训练数据的特征向量`
			`k = (model["K"]) # 主元个数`
			`# selectVec = np.array(model["selectVec"])`
			`selectVec = featVec[:, 0:k] ####自己选择的`
			`# index = np.argsort(-np.array(featValue)) # 按照featValue进行从大到小排序`
			`featValue_sort = featValue # [index] # 排序后的特征值`
			`'''`
			`featValue, featVec = np.linalg.eig(model["COV"]) # 求解协方差矩阵的特征值和特征向量`

			`index = np.argsort(-featValue) # 按照featValue进行从大到小排序`
			`featValue_sort =featValue[index]#排序后的特征值`
			`selectVec = np.matrix(featVec[:,index[:int(model["K"])]]) # 所以这里需要进行转置P`
			`'''`
			`############----------*******-SPE-************----------########################`
			`numbel_variable = featValue.shape[0]`
			`# LockVariable="3,5"`

			`C_ = np.eye(numbel_variable) - np.dot(selectVec, selectVec.T)`
			`X_SPE = C_.T`
			`D_SPE = C_`
			`DIAG_SPE = np.eye(numbel_variable)`

			`'''`
			`# **********************调用LARS*****************************`
			`t = 50000`
			`lamuta = 1`
			`limit_line = model["QCUL_99"]`
			`beta_path=[]`
			`for i in range(Data.shape[0]):`
			`Y=Data[i,:]`
			`beta, mse=Lars(X_SPE, Y, D_SPE, DIAG_SPE, t, limit_line, lamuta)`
			`beta_end=abs(beta[-1,:])`
			`jenk=jenkspy.jenks_breaks(beta_end,5)`
			`limt=(jenk[1]+jenk[2])/2`
			`index=np.where(beta_end>limt)[0]`
			`beta_path.append(beta[-1,:])`
			`'''`
			`############----------*******-T2-************----------########################`
			`DIAG_T2 = np.linalg.pinv(np.diag(featValue_sort[:int(model["K"])]))`
			`D_T2 = selectVec.copy()`
			`X_T2 = np.dot(D_T2, np.linalg.cholesky(DIAG_T2)).T`

			`############----------*******-综合指标-************----------########################`
			`II = featValue_sort.copy()`
			`II[:int(model["K"])] = II[:int(model["K"])] * model["T2CUL_99"]`
			`II[int(model["K"]):] = model["QCUL_99"]`
			`DIAG_Fai = np.linalg.inv(np.diag(II))`
			`D_Fai = featVec.copy()`
			`X_Fai = np.dot(D_Fai, np.linalg.cholesky(DIAG_Fai)).T`
			`# **********************调用LARS*****************************`
			`t = 50000`
			`lamuta = 1`
			`# limit_line = model["Kesi_99"]/np.sqrt(numbel_variable)#修改`
			`limit_line = model["Kesi_99"]*1`
			`beta_path = []`
			`SPE_list = []`
			`FAI_list = []`
			`paraState = np.zeros([np.array(Data_origin).shape[0], np.array(Data_origin).shape[1]])`
			`if Data.shape[1] >= 12:`
			`para_length = 3`
			`elif 12 > Data.shape[1] >= 7:`
			`para_length = 2`
			`else:`
			`para_length = 1`
			`plots_matrix = [] # 贡献图法的矩阵`
			`plots_index = [] # 贡献图法的index`
			`# para_length = Data.shape[1]`
			`for i in range(Data.shape[0]):`
			`Y = Data[i, :] # 测试数据的每一行`
			`if i == 1051:`
			`print("aaa")`
			`#########*******************计算SPE****************************`
			`SPE_line = np.dot(Y, C_).dot(Y.T)`
			`SPE_list.append(SPE_line)`
			`#########################计算综合指标##########`
			`temp = np.dot(Y.T, D_Fai).dot(DIAG_Fai).dot(D_Fai.T).dot(Y)`
			`if temp > 100:`
			`print("aa")`
			`FAI_list.append(np.dot(Y.T, D_Fai).dot(DIAG_Fai).dot(D_Fai.T).dot(Y))`
			`# ************计算LARS*************`
			`beta, mse = Lars(X_Fai, Y, D_Fai, DIAG_Fai, t, limit_line, LockVariable)`
			`beta_end = abs(beta[-1, :])`
			`pi = len(beta_end)`
			`if pi > 7:`
			`jenk = jenkspy.jenks_breaks(beta_end, 5)`
			`else:`
			`jenk = jenkspy.jenks_breaks(beta_end, 2)`
			`limt = (jenk[1] + jenk[2]) / 2`
			`index = np.where(beta_end > 0)[0]`
			`if len(index) > para_length:`
			`# 如果Lars诊断出来的故障个数大于限值则进一步重构`
			`res = recon_fault_diagnosis_r_c(Y, D_Fai @ DIAG_Fai @ D_Fai.T, limit_line, list(zip(index, beta_end[index])), model,`
			`True, X_SPE @ X_SPE.T, LockVariable, selectVec, rbc=None)`
			`if not isinstance(res[0], list):`
			`# 如果返回的res[0]是array类型则说明重构有结果,如果没有重构出来则返回的是[]`
			`# 如果是采用的贡献图法进行重构则res[1]为plot,否则是array类型,表示故障参数的index`
			`if res[1] == "plot":`
			`# beta[-1, :] = res[0]`
			`plots_matrix.append(res[0])`
			`plots_index.append(i)`
			`else:`
			`beta[-1, :], index = res[0].T, res[1]`
			`elif len(index) <= para_length and len(index) != 0:`
			`res = recon_fault_diagnosis_r_l(Y, D_Fai @ DIAG_Fai @ D_Fai.T, index)`
			`beta[-1, :], index = res[0].T, res[1]`
			`paraState[i, index] = 1`
			`beta_new = beta[-1, :] * paraState[i, :]`
			`beta_path.append(beta_new)`
			`beta_path = np.array(beta_path)`
			`# ###############-------------------------------------------------------------###############`
			`# finalData = np.dot(Data - beta_path, selectVec).dot(selectVec.T)`
			`finalData = Data - beta_path`
			`reconData = np.add(np.multiply(finalData, model["Train_X_std"]), model["Train_X_mean"]) # 重构值`
			`if plots_index:`
			`# 如果不为[],则说明重构的时候采用了贡献图法,则需要将对应的重构值替换成贡献图法的重构结果.`
			`reconData[plots_index] = plots_matrix`
			`errorData = Data_origin - reconData # 偏差值`

			`# cos检验值`
			`R = 0`
			`for index in range(0, reconData.shape[1]):`
			`vector1 = Data_origin[:, index]`
			`vector2 = np.array(reconData)[:, index]`
			`R += np.dot(vector1, vector2.T) / (np.sqrt(np.sum(vector1 ** 2)) * np.sqrt(np.sum(vector2 ** 2)))`
			`R /= reconData.shape[1]`
			`items = [('sampleData',np.transpose(np.array(Data_origin)).tolist()), ('reconData', reconData.T.tolist())`
			`, ('errorData', errorData.T.tolist()), ('R', R.tolist()), ('SPE', FAI_list),`
			`('paraState', paraState.tolist())]`
			`result = dict(items) # json.dumps(result)`
			`return result`


			`def get_history_value(points, time, interval):`
			`# url="http://192.168.1.201:8080/openPlant/getMultiplePointHistorys"`
			`url = "http://10.35.2.238:9000/exawebapi/exatime/GetSamplingValueArrayFloat"`
			`headers = {"Content-Type": "application/json;charset=utf-8"} # ,"token":get_token()`
			`point_array = points.split(",")`
			`time_span = time.split(";")`
			`value_array = []`
			`for item in point_array:`
			`for time_piece in time_span:`
			`st = time_piece.split(",")[0]`
			`et = time_piece.split(",")[1]`
			`para = {"ItemName": item, "StartingTime": st, "TerminalTime": et, "SamplingPeriod": interval}`
			`response = requests.get(url, headers=headers, params=para)`
			`content = response.text.replace('"[', '[').replace(']"', ']')`
			`value = json.loads(content)`
			`value_group = []`
			`for row in value:`
			`value_group.append(row[1])`
			`value_array.append(value_group)`
			`return np.transpose(np.array(value_array))`


			`if __name__ == "__main__":`

			`info_str = '{"Test_Data":{"time":"2021-07-18 10:34:16,2021-07-19 10:34:16","points":"JL_D2_20DAS05A:LBS60CP101.PNT,JL_D2_20DAS06A:LCJ60CT301.PNT,JL_D2_20DAS06A:LCA30CT305.PNT,JL_D2_20DAS06A:LCA30CT303.PNT,JL_D2_20DAS06A:LCA30CT304.PNT,JL_D2_20DAS06A:LCJ51CT301.PNT","interval":300000,"AddBias":[0.0,0.0,0.0,0.0,0.0,0.0],"AddBias_Time":"2021-07-18 10:34:16,2021-07-19 10:34:16"},"Model_id":770,"version":"v-2021-04-02 10:26:32"}'`
			`info = json.loads(info_str)`
			`model_id = info["Model_id"]`
			`version = info["version"]`
			`import PCA_Test`
			`if version == "v-test":`
			`res = PCA_Test.get_model_by_ID(model_id, version)`
			`else:`
			`res = PCA_Test.get_model_by_id_and_version(model_id, version)`
			`filename = res["algorithm"]`
			`if filename == "PCA":`
			`lock = []`
			`point_info = res["pointInfo"]`
			`for i in range(len(point_info)):`
			`try:`
			`if point_info[i]["lock"]:`
			`lock.append(i)`
			`except:`
			`continue`
			`lock = []`
			`Test_Data = info["Test_Data"]`
			`points = Test_Data["points"]`
			`time1 = Test_Data["time"]`
			`interval = Test_Data["interval"]`
			`model = res["para"]["Model_info"]`
			`Data = get_history_value(points, time1, interval)`
			`t1 = time.time()`
			`result = pca(model, lock, Data)`
			`t2 = time.time()`
			`print(t2 - t1)`
			`# print(result)`