import numpy as np
from numpy.random import randn
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
# 加载真实的Iris数据集
iris = load_iris()
X = iris.data
y = iris.target
# 数据标准化
scaler = StandardScaler()
X = scaler.fit_transform(X)
# 将标签转换为one-hot编码
def to_onehot(y, num_classes):
onehot = np.zeros((len(y), num_classes))
onehot[np.arange(len(y)), y] = 1
return onehot
y_onehot = to_onehot(y, 3)
# 设置网络参数
N, D_in, H, D_out = 150, 4, 20, 3
# 使用真实数据
x, y_true = X, y_onehot
print("特征数据形状:", x.shape)
print("标签数据形状:", y_true.shape)
# 改进的权重初始化 - 使用Xavier初始化
def xavier_init(n_in, n_out):
return np.random.randn(n_in, n_out) * np.sqrt(2.0 / (n_in + n_out))
w1 = xavier_init(D_in, H)
w2 = xavier_init(H, D_out)
print("w1权重形状:", w1.shape)
print("w2权重形状:", w2.shape)
# 训练过程
lsloss = []
accuracies = []
for t in range(500):
# 前向传播
z1 = x.dot(w1)
h = 1 / (1 + np.exp(-z1)) # sigmoid激活
y_pred = h.dot(w2)
# 计算损失 - 使用交叉熵损失(更稳定)
# 添加softmax确保数值稳定性
y_pred_softmax = np.exp(y_pred - np.max(y_pred, axis=1, keepdims=True))
y_pred_softmax = y_pred_softmax / np.sum(y_pred_softmax, axis=1, keepdims=True)
# 交叉熵损失
loss = -np.sum(y_true * np.log(y_pred_softmax + 1e-8)) / N
lsloss.append(loss)
# 计算准确率
pred_classes = np.argmax(y_pred, axis=1)
true_classes = y # 直接使用原始标签
accuracy = np.mean(pred_classes == true_classes)
accuracies.append(accuracy)
# 反向传播 - 使用交叉熵的梯度
grad_y_pred = (y_pred_softmax - y_true) / N # 交叉熵梯度
grad_w2 = h.T.dot(grad_y_pred)
grad_h = grad_y_pred.dot(w2.T)
grad_w1 = x.T.dot(grad_h * h * (1 - h))
# 梯度裁剪 - 防止梯度爆炸
clip_value = 1.0
grad_w1 = np.clip(grad_w1, -clip_value, clip_value)
grad_w2 = np.clip(grad_w2, -clip_value, clip_value)
# 使用更小的学习率
w1 -= 1e-1 * grad_w1 # 调整学习率
w2 -= 1e-1 * grad_w2
if t % 50 == 0:
print(f"Iteration {t}: Loss = {loss:.4f}, Accuracy = {accuracy:.4f}")
# 绘制结果
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(lsloss)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Loss Chart')
plt.grid(True)
plt.subplot(1, 2, 2)
plt.plot(accuracies)
plt.xlabel('Iteration')
plt.ylabel('Accuracy')
plt.title('Accuracy Chart')
plt.grid(True)
plt.tight_layout()
plt.show()
# 最终评估
final_pred = np.argmax(y_pred, axis=1)
final_accuracy = np.mean(final_pred == y)
print(f"\n最终准确率: {final_accuracy:.4f}")