import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
# 设置随机种子以确保结果可复现
torch.manual_seed(42)
# 定义超参数
input_size = 28 * 28 # MNIST 图像大小是 28x28
hidden_size = 128 # 隐藏层神经元数量
output_size = 10 # 输出类别数(0-9)
learning_rate = 0.001
batch_size = 64
num_epochs = 10
# 加载 MNIST 数据集
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
dataset = datasets.MNIST(root='./python_mnist', train=True, transform=transform, download=True)
# 划分训练集和验证集(9:1)
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
# 创建 DataLoader
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)
# 定义多层感知机(MLP)模型
class MLP(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(MLP, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size) # 输入层到隐藏层
self.relu = nn.ReLU() # 激活函数
self.fc2 = nn.Linear(hidden_size, output_size) # 隐藏层到输出层
def forward(self, x):
x = x.view(x.size(0), -1) # 将图像展平为向量
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
# 初始化模型、损失函数和优化器
model = MLP(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss() # 交叉熵损失函数
optimizer = optim.Adam(model.parameters(), lr=learning_rate) # Adam 优化器
# 存储损失和准确率
train_losses = []
val_losses = []
val_accuracies = []
# 训练循环
for epoch in range(num_epochs):
model.train() # 设置为训练模式
running_loss = 0.0
for images, labels in train_loader:
# 前向传播
outputs = model(images)
loss = criterion(outputs, labels)
# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
# 计算训练集平均损失
train_loss = running_loss / len(train_loader)
train_losses.append(train_loss)
# 验证集评估
model.eval() # 设置为评估模式
val_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
for images, labels in val_loader:
outputs = model(images)
loss = criterion(outputs, labels)
val_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
# 计算验证集平均损失和准确率
val_loss = val_loss / len(val_loader)
val_accuracy = 100 * correct / total
val_losses.append(val_loss)
val_accuracies.append(val_accuracy)
# 打印训练和验证结果
print(f'Epoch [{epoch+1}/{num_epochs}], '
f'Train Loss: {train_loss:.4f}, '
f'Val Loss: {val_loss:.4f}, '
f'Val Accuracy: {val_accuracy:.2f}%')
# 绘制损失和准确率曲线
plt.figure(figsize=(12, 5))
# 损失曲线
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss Curve')
plt.legend()
# 准确率曲线
plt.subplot(1, 2, 2)
plt.plot(val_accuracies, label='Val Accuracy', color='orange')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy Curve')
plt.legend()
plt.show()