模型蒸餾的程式碼

15375357604發表於2024-06-21
模型蒸餾
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.metrics import roc_auc_score
import numpy as np
import random

# 設定 Python 的隨機種子
seed_value = 42
np.random.seed(seed_value)
random.seed(seed_value)

# 設定 TensorFlow 的全域性隨機種子
tf.random.set_seed(seed_value)


def action_recall_accuracy(y_pred, y_true):
    cm = confusion_matrix(y_true, y_pred)

    # 計算每個類別的準確率和召回率
    num_classes = cm.shape[0]
    accuracy = []
    recall = []

    for i in range(num_classes):
        # 計算準確率:預測正確的樣本數 / 實際屬於該類別的樣本數
        acc = cm[i, i] / sum(cm[i, :])
        accuracy.append(acc)

        # 計算召回率:預測正確的樣本數 / 預測為該類別的樣本數
        rec = cm[i, i] / sum(cm[:, i])
        recall.append(rec)

    # 列印結果
    for i in range(num_classes):
        print(f"類別 {i} 的準確率: {accuracy[i]:.3f}")
        print(f"類別 {i} 的召回率: {recall[i]:.3f}")

    scores = []

    for i in range(num_classes):
        # 計算F1分數
        f1 = f1_score(y_true, y_pred, average=None)[i]
        scores.append(f1)

        # 列印F1分數
        print(f"類別 {i} 的F1分數: {scores[i]:.3f}")

    # 列印各類別F1-score的平均值
    average_f1 = sum(scores) / len(scores)
    print(f"各類別F1-score的平均值: {average_f1:.3f}")


# 讀取訓練檔案
train_data = pd.read_csv('train_new.csv')

# 將特徵與標籤分開
X = train_data.drop('label', axis=1)
y = train_data['label']

# 資料預處理
X = X.astype('float32')
y = y.astype('int32')

# 劃分訓練集和驗證集
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.001, random_state=42)

# 讀取測試檔案
test_data = pd.read_csv('test_new.csv')

# 對測試樣本進行預測
X_test = test_data.drop('label', axis=1).astype('float32')
true_labels = test_data['label'].astype('int32')


def custom_loss(y_true, y_pred, soft_labels, T, rate):
    # Adjust the predictions and soft_labels with temperature T
    y_pred_T = tf.nn.softmax(tf.keras.layers.Lambda(lambda x: x / T)(y_pred))
    soft_labels_T = tf.nn.softmax(tf.keras.layers.Lambda(lambda x: x / T)(soft_labels))

    # Compute the binary cross-entropy loss for hard labels
    hard_loss = tf.keras.losses.binary_crossentropy(tf.reshape(y_true, (-1, 1)), y_pred)

    # Compute the binary cross-entropy loss for soft labels, scaled by T^2
    soft_loss = tf.keras.losses.binary_crossentropy(soft_labels_T, y_pred_T) * (T ** 2)

    return hard_loss * rate + soft_loss * (1 - rate)


optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=True,
                                     name='Adam')
optimizer_teacher = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=True,
                                             name='Adam')


@tf.function
def train_step(batch_X_student, batch_X_teacher, model_student, model_teacher, batch_y, T, rate):
    with tf.GradientTape(persistent=True) as tape:
        predictions = model_student(batch_X_student, training=True)
        batch_soft_labels = model_teacher(batch_X_teacher, training=True)
        loss = custom_loss(batch_y, predictions, batch_soft_labels, T=T, rate=rate)
        teacher_loss = tf.keras.losses.binary_crossentropy(tf.reshape(batch_y, (-1, 1)), batch_soft_labels)

    gradients = tape.gradient(loss, model_student.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model_student.trainable_variables))

    gradients_teacher = tape.gradient(teacher_loss, model_teacher.trainable_variables)
    optimizer_teacher.apply_gradients(zip(gradients_teacher, model_teacher.trainable_variables))

    del tape  # 刪除持久化tape以釋放資源
    return loss, teacher_loss


def build_student_model():
    # 構建學生模型
    student_input = tf.keras.layers.Input(shape=(60,), name='relevance_input', dtype='float32')
    x = tf.keras.layers.Dense(128, activation='relu')(student_input)
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    x = tf.keras.layers.Dense(32, activation='relu')(x)
    outputs = tf.keras.layers.Dense(1, activation='sigmoid', name='student_output')(x)
    student_model = tf.keras.Model(inputs=student_input, outputs=outputs)
    return student_model


def build_teacher_model():
    # 構建教師模型
    teacher_input = tf.keras.layers.Input(shape=(124,), name='relevance_input', dtype='float32')
    x = tf.keras.layers.Dense(128, activation='relu')(teacher_input)
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    x = tf.keras.layers.Dense(32, activation='relu')(x)
    outputs = tf.keras.layers.Dense(1, activation='sigmoid', name='teacher_output')(x)
    teacher_model = tf.keras.Model(inputs=teacher_input, outputs=outputs)
    return teacher_model


class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, X_data, y_data, batch_size):
        self.X_data_student = X_data.iloc[:, :60]
        self.X_data_teacher = X_data
        self.y_data = y_data
        self.batch_size = batch_size
        # self.on_epoch_end()  # Shuffle data at the end of each epoch

    def __len__(self):
        return int(np.ceil(len(self.y_data) / self.batch_size))

    def __getitem__(self, index):
        batch_X_student = self.X_data_student[index * self.batch_size:(index + 1) * self.batch_size].to_numpy()
        batch_X_teacher = self.X_data_teacher[index * self.batch_size:(index + 1) * self.batch_size].to_numpy()
        batch_y = self.y_data[index * self.batch_size:(index + 1) * self.batch_size].to_numpy()
        return batch_X_student, batch_X_teacher, batch_y

    # def on_epoch_end(self):
    #     # Shuffle the data
    #     indices = np.arange(len(self.y_data))
    #     np.random.shuffle(indices)
    #     self.X_data_student = self.X_data_student.iloc[indices]
    #     self.X_data_teacher = self.X_data_teacher.iloc[indices]
    #     self.y_data = self.y_data.iloc[indices]


# 建立資料生成器
train_generator = DataGenerator(X_train, y_train, batch_size=16)

# 構建teacher和student模型
student_model = build_student_model()
teacher_model = build_teacher_model()

# loss中溫度引數T和損失比例引數的調參
t_list = [1, 2, 3, 4, 5, 6, 7,8,0.5]
rate_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
for t in t_list:
    for rate in rate_list:
        print("當前的引數T和rate值為:", t, rate)
        for epoch in range(11):
            print("...............................epoch:", epoch)
            # 使用自定義資料生成器進行訓練
            for batch_X_student, batch_X_teacher, batch_y in train_generator:
                loss, teacher_loss = train_step(batch_X_student, batch_X_teacher, student_model, teacher_model, batch_y, t,
                                                rate)

            # # 在每個epoch之後打亂資料
            # train_generator.on_epoch_end()

            predictions = student_model.predict(X_test.iloc[:, :60])
            auc = roc_auc_score(list(np.array(true_labels)), predictions[:, 0])
            print(f"Testing AUC: {auc}")

            # 使用最佳閾值進行預測
            pred_labels = [int(i > 0.5) for i in predictions[:, 0]]
            true_labels_list = list(np.array(true_labels))
            action_recall_accuracy(pred_labels, true_labels_list)

            if epoch == 10:
                student_model.save(f"./dnn2_student_model", save_format='tf')

        print("||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||")

相關文章