基于Numpy的反向传播算法(Python)

全连接神经网络训练MNIST数据集

Python示例代码

        import torch
        import torch.nn as nn
        import torch.optim as optim
        from torch.utils.data import DataLoader, random_split
        from torchvision import datasets, transforms
        import matplotlib.pyplot as plt

        # 设置随机种子以确保结果可复现
        torch.manual_seed(42)

        # 定义超参数
        input_size = 28 * 28  # MNIST 图像大小是 28x28
        hidden_size = 128     # 隐藏层神经元数量
        output_size = 10      # 输出类别数(0-9)
        learning_rate = 0.001
        batch_size = 64
        num_epochs = 10

        # 加载 MNIST 数据集
        transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
        dataset = datasets.MNIST(root='./python_mnist', train=True, transform=transform, download=True)

        # 划分训练集和验证集(9:1)
        train_size = int(0.9 * len(dataset))
        val_size = len(dataset) - train_size
        train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

        # 创建 DataLoader
        train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

        # 定义多层感知机(MLP)模型
        class MLP(nn.Module):
            def __init__(self, input_size, hidden_size, output_size):
                super(MLP, self).__init__()
                self.fc1 = nn.Linear(input_size, hidden_size)  # 输入层到隐藏层
                self.relu = nn.ReLU()                         # 激活函数
                self.fc2 = nn.Linear(hidden_size, output_size)  # 隐藏层到输出层

            def forward(self, x):
                x = x.view(x.size(0), -1)  # 将图像展平为向量
                out = self.fc1(x)
                out = self.relu(out)
                out = self.fc2(out)
                return out

        # 初始化模型、损失函数和优化器
        model = MLP(input_size, hidden_size, output_size)
        criterion = nn.CrossEntropyLoss()  # 交叉熵损失函数
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)  # Adam 优化器

        # 存储损失和准确率
        train_losses = []
        val_losses = []
        val_accuracies = []

        # 训练循环
        for epoch in range(num_epochs):
            model.train()  # 设置为训练模式
            running_loss = 0.0
            for images, labels in train_loader:
                # 前向传播
                outputs = model(images)
                loss = criterion(outputs, labels)
                
                # 反向传播和优化
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                running_loss += loss.item()
            
            # 计算训练集平均损失
            train_loss = running_loss / len(train_loader)
            train_losses.append(train_loss)
            
            # 验证集评估
            model.eval()  # 设置为评估模式
            val_loss = 0.0
            correct = 0
            total = 0
            with torch.no_grad():
                for images, labels in val_loader:
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()
                    
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
            
            # 计算验证集平均损失和准确率
            val_loss = val_loss / len(val_loader)
            val_accuracy = 100 * correct / total
            val_losses.append(val_loss)
            val_accuracies.append(val_accuracy)
            
            # 打印训练和验证结果
            print(f'Epoch [{epoch+1}/{num_epochs}], '
                  f'Train Loss: {train_loss:.4f}, '
                  f'Val Loss: {val_loss:.4f}, '
                  f'Val Accuracy: {val_accuracy:.2f}%')

        # 绘制损失和准确率曲线
        plt.figure(figsize=(12, 5))

        # 损失曲线
        plt.subplot(1, 2, 1)
        plt.plot(train_losses, label='Train Loss')
        plt.plot(val_losses, label='Val Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title('Loss Curve')
        plt.legend()

        # 准确率曲线
        plt.subplot(1, 2, 2)
        plt.plot(val_accuracies, label='Val Accuracy', color='orange')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.title('Accuracy Curve')
        plt.legend()

        plt.show()